Update the infiniband stack to Mellanox's OFED version 2.1.

Highlights:
 - Multiple verbs API updates
 - Support for RoCE, RDMA over ethernet

All hardware drivers depending on the common infiniband stack has been
updated aswell.

Discussed with:	np @
Sponsored by:	Mellanox Technologies
MFC after:	1 month
This commit is contained in:
Hans Petter Selasky 2015-02-17 08:40:27 +00:00
parent fa592170fe
commit b5c1e0cb8d
82 changed files with 11437 additions and 7007 deletions

View File

@ -3624,19 +3624,16 @@ ofed/drivers/infiniband/core/fmr_pool.c optional ofed \
ofed/drivers/infiniband/core/iwcm.c optional ofed \
no-depend \
compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
ofed/drivers/infiniband/core/local_sa.c optional ofed \
no-depend \
compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
ofed/drivers/infiniband/core/mad_rmpp.c optional ofed \
no-depend \
compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
ofed/drivers/infiniband/core/multicast.c optional ofed \
no-depend \
compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
ofed/drivers/infiniband/core/notice.c optional ofed \
ofed/drivers/infiniband/core/packer.c optional ofed \
no-depend \
compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
ofed/drivers/infiniband/core/packer.c optional ofed \
ofed/drivers/infiniband/core/peer_mem.c optional ofed \
no-depend \
compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
ofed/drivers/infiniband/core/sa_query.c optional ofed \
@ -3741,6 +3738,9 @@ ofed/drivers/infiniband/hw/mlx4/mad.c optional mlx4ib \
ofed/drivers/infiniband/hw/mlx4/main.c optional mlx4ib \
no-depend obj-prefix "mlx4ib_" \
compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
ofed/drivers/infiniband/hw/mlx4/mlx4_exp.c optional mlx4ib \
no-depend \
compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
ofed/drivers/infiniband/hw/mlx4/mr.c optional mlx4ib \
no-depend obj-prefix "mlx4ib_" \
compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"

View File

@ -525,7 +525,7 @@ static void krping_setup_wr(struct krping_cb *cb)
case MW:
cb->bind_attr.wr_id = 0xabbaabba;
cb->bind_attr.send_flags = 0; /* unsignaled */
cb->bind_attr.length = cb->size;
cb->bind_attr.bind_info.length = cb->size;
break;
default:
break;
@ -627,7 +627,7 @@ static int krping_setup_buffers(struct krping_cb *cb)
cb->page_list, cb->page_list_len);
break;
case MW:
cb->mw = ib_alloc_mw(cb->pd);
cb->mw = ib_alloc_mw(cb->pd,IB_MW_TYPE_1);
if (IS_ERR(cb->mw)) {
DEBUG_LOG(cb, "recv_buf alloc_mw failed\n");
ret = PTR_ERR(cb->mw);
@ -898,15 +898,15 @@ static u32 krping_rdma_rkey(struct krping_cb *cb, u64 buf, int post_inv)
* Update the MW with new buf info.
*/
if (buf == (u64)cb->start_dma_addr) {
cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_READ;
cb->bind_attr.mr = cb->start_mr;
cb->bind_attr.bind_info.mw_access_flags = IB_ACCESS_REMOTE_READ;
cb->bind_attr.bind_info.mr = cb->start_mr;
} else {
cb->bind_attr.mw_access_flags = IB_ACCESS_REMOTE_WRITE;
cb->bind_attr.mr = cb->rdma_mr;
cb->bind_attr.bind_info.mw_access_flags = IB_ACCESS_REMOTE_WRITE;
cb->bind_attr.bind_info.mr = cb->rdma_mr;
}
cb->bind_attr.addr = buf;
cb->bind_attr.bind_info.addr = buf;
DEBUG_LOG(cb, "binding mw rkey 0x%x to buf %llx mr rkey 0x%x\n",
cb->mw->rkey, buf, cb->bind_attr.mr->rkey);
cb->mw->rkey, buf, cb->bind_attr.bind_info.mr->rkey);
ret = ib_bind_mw(cb->qp, cb->mw, &cb->bind_attr);
if (ret) {
PRINTF(cb, "bind mw error %d\n", ret);
@ -2304,7 +2304,7 @@ int krping_doit(char *cmd, void *cookie)
goto out;
}
cb->cm_id = rdma_create_id(krping_cma_event_handler, cb, RDMA_PS_TCP);
cb->cm_id = rdma_create_id(krping_cma_event_handler, cb, RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(cb->cm_id)) {
ret = PTR_ERR(cb->cm_id);
PRINTF(cb, "rdma_create_id error %d\n", ret);

View File

@ -176,7 +176,7 @@ iwch_destroy_cq(struct ib_cq *ib_cq)
}
static struct ib_cq *
iwch_create_cq(struct ib_device *ibdev, int entries, int vector,
iwch_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr,
struct ib_ucontext *ib_context,
struct ib_udata *udata)
{
@ -187,6 +187,7 @@ iwch_create_cq(struct ib_device *ibdev, int entries, int vector,
struct iwch_ucontext *ucontext = NULL;
static int warned;
size_t resplen;
int entries = attr->cqe;
CTR3(KTR_IW_CXGB, "%s ib_dev %p entries %d", __FUNCTION__, ibdev, entries);
rhp = to_iwch_dev(ibdev);
@ -545,16 +546,14 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int mr_id)
{
__be64 *pages;
int shift, i, n;
int shift, n, len;
int i, k, entry;
int err = 0;
struct ib_umem_chunk *chunk;
struct iwch_dev *rhp;
struct iwch_pd *php;
struct iwch_mr *mhp;
struct iwch_reg_user_mr_resp uresp;
#ifdef notyet
int j, k, len;
#endif
struct scatterlist *sg;
CTR2(KTR_IW_CXGB, "%s ib_pd %p", __FUNCTION__, pd);
@ -575,9 +574,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
shift = ffs(mhp->umem->page_size) - 1;
n = 0;
list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
n += chunk->nents;
n = mhp->umem->nmap;
err = iwch_alloc_pbl(mhp, n);
if (err)
@ -591,7 +588,21 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
i = n = 0;
#ifdef notyet
for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = cpu_to_be64(sg_dma_address(sg) +
mhp->umem->page_size * k);
if (i == PAGE_SIZE / sizeof *pages) {
err = iwch_write_pbl(mhp, pages, i, n);
if (err)
goto pbl_done;
n += i;
i = 0;
}
}
}
#if 0
TAILQ_FOREACH(chunk, &mhp->umem->chunk_list, entry)
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift;
@ -612,9 +623,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (i)
err = iwch_write_pbl(mhp, pages, i, n);
#ifdef notyet
pbl_done:
#endif
cxfree(pages);
if (err)
goto err_pbl;
@ -672,7 +681,7 @@ static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
return ibmr;
}
static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd)
static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
{
struct iwch_dev *rhp;
struct iwch_pd *php;

View File

@ -551,18 +551,18 @@ int iwch_bind_mw(struct ib_qp *qp,
if (mw_bind->send_flags & IB_SEND_SIGNALED)
t3_wr_flags = T3_COMPLETION_FLAG;
sgl.addr = mw_bind->addr;
sgl.lkey = mw_bind->mr->lkey;
sgl.length = mw_bind->length;
sgl.addr = mw_bind->bind_info.addr;
sgl.lkey = mw_bind->bind_info.mr->lkey;
sgl.length = mw_bind->bind_info.length;
wqe->bind.reserved = 0;
wqe->bind.type = T3_VA_BASED_TO;
/* TBD: check perms */
wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->mw_access_flags);
wqe->bind.mr_stag = htobe32(mw_bind->mr->lkey);
wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->bind_info.mw_access_flags);
wqe->bind.mr_stag = htobe32(mw_bind->bind_info.mr->lkey);
wqe->bind.mw_stag = htobe32(mw->rkey);
wqe->bind.mw_len = htobe32(mw_bind->length);
wqe->bind.mw_va = htobe64(mw_bind->addr);
wqe->bind.mw_len = htobe32(mw_bind->bind_info.length);
wqe->bind.mw_va = htobe64(mw_bind->bind_info.addr);
err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
if (err) {
mtx_unlock(&qhp->lock);

View File

@ -775,7 +775,7 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq)
}
struct ib_cq *
c4iw_create_cq(struct ib_device *ibdev, int entries, int vector,
c4iw_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr,
struct ib_ucontext *ib_context, struct ib_udata *udata)
{
struct c4iw_dev *rhp;
@ -785,6 +785,7 @@ c4iw_create_cq(struct ib_device *ibdev, int entries, int vector,
int ret;
size_t memsize, hwentries;
struct c4iw_mm_entry *mm, *mm2;
int entries = attr->cqe;
CTR3(KTR_IW_CXGBE, "%s ib_dev %p entries %d", __func__, ibdev, entries);

View File

@ -864,7 +864,7 @@ struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(
int page_list_len);
struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth);
int c4iw_dealloc_mw(struct ib_mw *mw);
struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd);
struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64
virt, int acc, struct ib_udata *udata, int mr_id);
struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc);
@ -881,8 +881,7 @@ int c4iw_reregister_phys_mem(struct ib_mr *mr,
int acc, u64 *iova_start);
int c4iw_dereg_mr(struct ib_mr *ib_mr);
int c4iw_destroy_cq(struct ib_cq *ib_cq);
struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
int vector,
struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, struct ib_cq_init_attr *attr,
struct ib_ucontext *ib_context,
struct ib_udata *udata);
int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata);

View File

@ -563,9 +563,9 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
__be64 *pages;
int shift, n, len;
int i, j, k;
int i, k, entry;
int err = 0;
struct ib_umem_chunk *chunk;
struct scatterlist *sg;
struct c4iw_dev *rhp;
struct c4iw_pd *php;
struct c4iw_mr *mhp;
@ -594,11 +594,8 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
shift = ffs(mhp->umem->page_size) - 1;
n = 0;
list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
n += chunk->nents;
n = mhp->umem->nmap;
err = alloc_pbl(mhp, n);
if (err)
goto err;
@ -610,25 +607,23 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
i = n = 0;
list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = cpu_to_be64(sg_dma_address(
&chunk->page_list[j]) +
for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = cpu_to_be64(sg_dma_address(sg) +
mhp->umem->page_size * k);
if (i == PAGE_SIZE / sizeof *pages) {
err = write_pbl(&mhp->rhp->rdev,
pages,
mhp->attr.pbl_addr + (n << 3), i);
if (err)
goto pbl_done;
n += i;
i = 0;
}
if (i == PAGE_SIZE / sizeof *pages) {
err = write_pbl(&mhp->rhp->rdev,
pages,
mhp->attr.pbl_addr + (n << 3), i);
if (err)
goto pbl_done;
n += i;
i = 0;
}
}
}
if (i)
err = write_pbl(&mhp->rhp->rdev, pages,
@ -662,7 +657,7 @@ err:
return ERR_PTR(err);
}
struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd)
struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
{
struct c4iw_dev *rhp;
struct c4iw_pd *php;

View File

@ -4,8 +4,8 @@
KMOD= ibcore
SRCS= addr.c iwcm.c sa_query.c ucma.c uverbs_cmd.c \
agent.c local_sa.c multicast.c smi.c ud_header.c uverbs_main.c \
mad.c notice.c umem.c uverbs_marshall.c \
agent.c multicast.c smi.c ud_header.c uverbs_main.c \
mad.c peer_mem.c umem.c uverbs_marshall.c \
cache.c device.c packer.c sysfs.c user_mad.c verbs.c \
cm.c fmr_pool.c mad_rmpp.c ucm.c cma.c \
vnode_if.h device_if.h bus_if.h pci_if.h \

View File

@ -6,6 +6,7 @@ KMOD= mlx4ib
SRCS= device_if.h bus_if.h vnode_if.h pci_if.h \
opt_inet.h opt_inet6.h \
alias_GUID.c mcg.c sysfs.c ah.c cq.c \
mlx4_exp.c \
doorbell.c mad.c main.c mr.c qp.c srq.c wc.c cm.c
CFLAGS+= -I${.CURDIR}/../../ofed/drivers/infiniband/hw/mlx4

View File

@ -1,32 +0,0 @@
infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o rdma_cm.o
user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o
obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \
ib_cm.o iw_cm.o $(infiniband-y)
obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
$(user_access-y)
ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
device.o fmr_pool.o cache.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_mad-y := mad.o smi.o agent.o mad_rmpp.o
ib_sa-y := sa_query.o multicast.o notice.o local_sa.o
ib_cm-y := cm.o
iw_cm-y := iwcm.o
rdma_cm-y := cma.o
rdma_ucm-y := ucma.o
ib_addr-y := addr.o
ib_umad-y := user_mad.o
ib_ucm-y := ucm.o
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o

View File

@ -69,6 +69,7 @@ static LIST_HEAD(req_list);
static struct delayed_work work;
static struct workqueue_struct *addr_wq;
static struct rdma_addr_client self;
void rdma_addr_register_client(struct rdma_addr_client *client)
{
atomic_set(&client->refcount, 1);
@ -89,19 +90,6 @@ void rdma_addr_unregister_client(struct rdma_addr_client *client)
}
EXPORT_SYMBOL(rdma_addr_unregister_client);
#ifdef __linux__
int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
const unsigned char *dst_dev_addr)
{
dev_addr->dev_type = dev->type;
memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
if (dst_dev_addr)
memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
dev_addr->bound_dev_if = dev->ifindex;
return 0;
}
#else
int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev,
const unsigned char *dst_dev_addr)
{
@ -119,10 +107,10 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev,
dev_addr->bound_dev_if = dev->if_index;
return 0;
}
#endif
EXPORT_SYMBOL(rdma_copy_addr);
int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
u16 *vlan_id)
{
struct net_device *dev;
int ret = -EADDRNOTAVAIL;
@ -137,33 +125,21 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
}
switch (addr->sa_family) {
#ifdef INET
case AF_INET:
dev = ip_dev_find(NULL,
dev = ip_dev_find(&init_net,
((struct sockaddr_in *) addr)->sin_addr.s_addr);
if (!dev)
return ret;
ret = rdma_copy_addr(dev_addr, dev, NULL);
if (vlan_id)
*vlan_id = rdma_vlan_dev_vlan_id(dev);
dev_put(dev);
break;
#endif
#if defined(INET6)
case AF_INET6:
#ifdef __linux__
read_lock(&dev_base_lock);
for_each_netdev(&init_net, dev) {
if (ipv6_chk_addr(&init_net,
&((struct sockaddr_in6 *) addr)->sin6_addr,
dev, 1)) {
ret = rdma_copy_addr(dev_addr, dev, NULL);
break;
}
}
read_unlock(&dev_base_lock);
#else
{
struct sockaddr_in6 *sin6;
struct ifaddr *ifa;
@ -179,11 +155,11 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
break;
}
ret = rdma_copy_addr(dev_addr, ifa->ifa_ifp, NULL);
if (vlan_id)
*vlan_id = rdma_vlan_dev_vlan_id(ifa->ifa_ifp);
ifa_free(ifa);
break;
}
#endif
break;
#endif
}
return ret;
@ -218,127 +194,6 @@ static void queue_req(struct addr_req *req)
mutex_unlock(&lock);
}
#ifdef __linux__
static int addr4_resolve(struct sockaddr_in *src_in,
struct sockaddr_in *dst_in,
struct rdma_dev_addr *addr)
{
__be32 src_ip = src_in->sin_addr.s_addr;
__be32 dst_ip = dst_in->sin_addr.s_addr;
struct flowi fl;
struct rtable *rt;
struct neighbour *neigh;
int ret;
memset(&fl, 0, sizeof fl);
fl.nl_u.ip4_u.daddr = dst_ip;
fl.nl_u.ip4_u.saddr = src_ip;
fl.oif = addr->bound_dev_if;
ret = ip_route_output_key(&init_net, &rt, &fl);
if (ret)
goto out;
src_in->sin_family = AF_INET;
src_in->sin_addr.s_addr = rt->rt_src;
if (rt->idev->dev->flags & IFF_LOOPBACK) {
ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
goto put;
}
/* If the device does ARP internally, return 'done' */
if (rt->idev->dev->flags & IFF_NOARP) {
rdma_copy_addr(addr, rt->idev->dev, NULL);
goto put;
}
neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
if (!neigh || !(neigh->nud_state & NUD_VALID)) {
neigh_event_send(rt->u.dst.neighbour, NULL);
ret = -ENODATA;
if (neigh)
goto release;
goto put;
}
ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
release:
neigh_release(neigh);
put:
ip_rt_put(rt);
out:
return ret;
}
#if defined(INET6)
static int addr6_resolve(struct sockaddr_in6 *src_in,
struct sockaddr_in6 *dst_in,
struct rdma_dev_addr *addr)
{
struct flowi fl;
struct neighbour *neigh;
struct dst_entry *dst;
int ret;
memset(&fl, 0, sizeof fl);
ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr);
ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr);
fl.oif = addr->bound_dev_if;
dst = ip6_route_output(&init_net, NULL, &fl);
if ((ret = dst->error))
goto put;
if (ipv6_addr_any(&fl.fl6_src)) {
ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
&fl.fl6_dst, 0, &fl.fl6_src);
if (ret)
goto put;
src_in->sin6_family = AF_INET6;
ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src);
}
if (dst->dev->flags & IFF_LOOPBACK) {
ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
if (!ret)
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
goto put;
}
/* If the device does ARP internally, return 'done' */
if (dst->dev->flags & IFF_NOARP) {
ret = rdma_copy_addr(addr, dst->dev, NULL);
goto put;
}
neigh = dst->neighbour;
if (!neigh || !(neigh->nud_state & NUD_VALID)) {
neigh_event_send(dst->neighbour, NULL);
ret = -ENODATA;
goto put;
}
ret = rdma_copy_addr(addr, dst->dev, neigh->ha);
put:
dst_release(dst);
return ret;
}
#else
static int addr6_resolve(struct sockaddr_in6 *src_in,
struct sockaddr_in6 *dst_in,
struct rdma_dev_addr *addr)
{
return -EADDRNOTAVAIL;
}
#endif
#else
#include <netinet/if_ether.h>
static int addr_resolve(struct sockaddr *src_in,
struct sockaddr *dst_in,
struct rdma_dev_addr *addr)
@ -354,7 +209,6 @@ static int addr_resolve(struct sockaddr *src_in,
int bcast;
int is_gw = 0;
int error = 0;
/*
* Determine whether the address is unicast, multicast, or broadcast
* and whether the source interface is valid.
@ -382,8 +236,7 @@ static int addr_resolve(struct sockaddr *src_in,
port = sin->sin_port;
sin->sin_port = 0;
memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
} else
src_in = NULL;
}
break;
#endif
#ifdef INET6
@ -406,7 +259,7 @@ static int addr_resolve(struct sockaddr *src_in,
* If we have a source address to use look it up first and verify
* that it is a local interface.
*/
if (src_in) {
if (sin->sin_addr.s_addr != INADDR_ANY) {
ifa = ifa_ifwithaddr(src_in);
if (sin)
sin->sin_port = port;
@ -436,15 +289,20 @@ static int addr_resolve(struct sockaddr *src_in,
* correct interface pointer and unlock the route.
*/
if (multi || bcast) {
if (ifp == NULL)
if (ifp == NULL) {
ifp = rte->rt_ifp;
/* rt_ifa holds the route answer source address */
ifa = rte->rt_ifa;
}
RTFREE_LOCKED(rte);
} else if (ifp && ifp != rte->rt_ifp) {
RTFREE_LOCKED(rte);
return -ENETUNREACH;
} else {
if (ifp == NULL)
if (ifp == NULL) {
ifp = rte->rt_ifp;
ifa = rte->rt_ifa;
}
RT_UNLOCK(rte);
}
mcast:
@ -459,6 +317,8 @@ mcast:
error = rdma_copy_addr(addr, ifp,
LLADDR((struct sockaddr_dl *)llsa));
free(llsa, M_IFMADDR);
if (error == 0)
memcpy(src_in, ifa->ifa_addr, ip_addr_size(ifa->ifa_addr));
return error;
}
/*
@ -472,7 +332,7 @@ mcast:
#endif
#ifdef INET6
case AF_INET6:
error = nd6_storelladdr(ifp, NULL, dst_in, (u_char *)edst,NULL);
error = nd6_storelladdr(ifp, NULL, dst_in, (u_char *)edst, NULL);
break;
#endif
default:
@ -480,15 +340,15 @@ mcast:
error = -EINVAL;
}
RTFREE(rte);
if (error == 0)
if (error == 0) {
memcpy(src_in, ifa->ifa_addr, ip_addr_size(ifa->ifa_addr));
return rdma_copy_addr(addr, ifp, edst);
}
if (error == EWOULDBLOCK)
return -ENODATA;
return -error;
}
#endif
static void process_req(struct work_struct *work)
{
struct addr_req *req, *temp_req;
@ -602,20 +462,94 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
}
EXPORT_SYMBOL(rdma_addr_cancel);
struct resolve_cb_context {
struct rdma_dev_addr *addr;
struct completion comp;
};
static void resolve_cb(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context)
{
memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
rdma_dev_addr));
complete(&((struct resolve_cb_context *)context)->comp);
}
int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac,
u16 *vlan_id)
{
int ret = 0;
struct rdma_dev_addr dev_addr;
struct resolve_cb_context ctx;
struct net_device *dev;
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} sgid_addr, dgid_addr;
ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid);
if (ret)
return ret;
ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid);
if (ret)
return ret;
memset(&dev_addr, 0, sizeof(dev_addr));
ctx.addr = &dev_addr;
init_completion(&ctx.comp);
ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
&dev_addr, 1000, resolve_cb, &ctx);
if (ret)
return ret;
wait_for_completion(&ctx.comp);
memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
if (!dev)
return -ENODEV;
if (vlan_id)
*vlan_id = rdma_vlan_dev_vlan_id(dev);
dev_put(dev);
return ret;
}
EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
{
int ret = 0;
struct rdma_dev_addr dev_addr;
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} gid_addr;
ret = rdma_gid2ip(&gid_addr._sockaddr, sgid);
if (ret)
return ret;
memset(&dev_addr, 0, sizeof(dev_addr));
ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
if (ret)
return ret;
memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
return ret;
}
EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
static int netevent_callback(struct notifier_block *self, unsigned long event,
void *ctx)
{
if (event == NETEVENT_NEIGH_UPDATE) {
#ifdef __linux__
struct neighbour *neigh = ctx;
if (neigh->nud_state & NUD_VALID) {
set_timeout(jiffies);
}
#else
set_timeout(jiffies);
#endif
}
return 0;
}
@ -631,11 +565,13 @@ static int __init addr_init(void)
return -ENOMEM;
register_netevent_notifier(&nb);
rdma_addr_register_client(&self);
return 0;
}
static void __exit addr_cleanup(void)
{
rdma_addr_unregister_client(&self);
unregister_netevent_notifier(&nb);
destroy_workqueue(addr_wq);
}

View File

@ -76,19 +76,21 @@ int ib_get_cached_gid(struct ib_device *device,
{
struct ib_gid_cache *cache;
unsigned long flags;
int ret = 0;
int ret = -EINVAL;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
cache = device->cache.gid_cache[port_num - start_port(device)];
if (device->cache.gid_cache) {
cache = device->cache.gid_cache[port_num - start_port(device)];
if (index < 0 || index >= cache->table_len)
ret = -EINVAL;
else
*gid = cache->table[index];
if (cache && index >= 0 && index < cache->table_len) {
*gid = cache->table[index];
ret = 0;
}
}
read_unlock_irqrestore(&device->cache.lock, flags);
@ -111,22 +113,24 @@ int ib_find_cached_gid(struct ib_device *device,
*index = -1;
read_lock_irqsave(&device->cache.lock, flags);
if (!device->cache.gid_cache)
goto out;
for (p = 0; p <= end_port(device) - start_port(device); ++p) {
cache = device->cache.gid_cache[p];
if (!cache)
continue;
for (i = 0; i < cache->table_len; ++i) {
if (!memcmp(gid, &cache->table[i], sizeof *gid)) {
*port_num = p + start_port(device);
if (index)
*index = i;
ret = 0;
goto found;
goto out;
}
}
}
found:
out:
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_find_cached_gid);
@ -138,19 +142,21 @@ int ib_get_cached_pkey(struct ib_device *device,
{
struct ib_pkey_cache *cache;
unsigned long flags;
int ret = 0;
int ret = -EINVAL;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
cache = device->cache.pkey_cache[port_num - start_port(device)];
if (device->cache.pkey_cache) {
cache = device->cache.pkey_cache[port_num - start_port(device)];
if (index < 0 || index >= cache->table_len)
ret = -EINVAL;
else
*pkey = cache->table[index];
if (cache && index >= 0 && index < cache->table_len) {
*pkey = cache->table[index];
ret = 0;
}
}
read_unlock_irqrestore(&device->cache.lock, flags);
@ -167,41 +173,93 @@ int ib_find_cached_pkey(struct ib_device *device,
unsigned long flags;
int i;
int ret = -ENOENT;
int partial_ix = -1;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
*index = -1;
read_lock_irqsave(&device->cache.lock, flags);
cache = device->cache.pkey_cache[port_num - start_port(device)];
if (!device->cache.pkey_cache)
goto out;
*index = -1;
cache = device->cache.pkey_cache[port_num - start_port(device)];
if (!cache)
goto out;
for (i = 0; i < cache->table_len; ++i)
if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
if (cache->table[i] & 0x8000) {
*index = i;
ret = 0;
break;
} else
partial_ix = i;
}
if (ret && partial_ix >= 0) {
*index = partial_ix;
ret = 0;
}
out:
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_find_cached_pkey);
int ib_find_exact_cached_pkey(struct ib_device *device,
u8 port_num,
u16 pkey,
u16 *index)
{
struct ib_pkey_cache *cache;
unsigned long flags;
int i;
int ret = -ENOENT;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
*index = -1;
read_lock_irqsave(&device->cache.lock, flags);
if (!device->cache.pkey_cache)
goto out;
cache = device->cache.pkey_cache[port_num - start_port(device)];
if (!cache)
goto out;
for (i = 0; i < cache->table_len; ++i)
if (cache->table[i] == pkey) {
*index = i;
ret = 0;
break;
}
out:
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_find_cached_pkey);
EXPORT_SYMBOL(ib_find_exact_cached_pkey);
int ib_get_cached_lmc(struct ib_device *device,
u8 port_num,
u8 *lmc)
{
unsigned long flags;
int ret = 0;
int ret = -EINVAL;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
*lmc = device->cache.lmc_cache[port_num - start_port(device)];
if (device->cache.lmc_cache) {
*lmc = device->cache.lmc_cache[port_num - start_port(device)];
ret = 0;
}
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
@ -217,6 +275,10 @@ static void ib_cache_update(struct ib_device *device,
int i;
int ret;
if (!(device->cache.pkey_cache && device->cache.gid_cache &&
device->cache.lmc_cache))
return;
tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
if (!tprops)
return;
@ -309,7 +371,7 @@ static void ib_cache_event(struct ib_event_handler *handler,
INIT_WORK(&work->work, ib_cache_task);
work->device = event->device;
work->port_num = event->element.port_num;
schedule_work(&work->work);
queue_work(ib_wq, &work->work);
}
}
}
@ -362,14 +424,21 @@ err:
kfree(device->cache.pkey_cache);
kfree(device->cache.gid_cache);
kfree(device->cache.lmc_cache);
device->cache.pkey_cache = NULL;
device->cache.gid_cache = NULL;
device->cache.lmc_cache = NULL;
}
static void ib_cache_cleanup_one(struct ib_device *device)
{
int p;
if (!(device->cache.pkey_cache && device->cache.gid_cache &&
device->cache.lmc_cache))
return;
ib_unregister_event_handler(&device->cache.event_handler);
flush_scheduled_work();
flush_workqueue(ib_wq);
for (p = 0; p <= end_port(device) - start_port(device); ++p) {
kfree(device->cache.pkey_cache[p]);

View File

@ -36,16 +36,19 @@
#include <linux/completion.h>
#include <linux/dma-mapping.h>
#include <linux/device.h>
#include <linux/module.h>
#include <linux/err.h>
#include <linux/idr.h>
#include <linux/interrupt.h>
#include <linux/random.h>
#include <linux/rbtree.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/sysfs.h>
#include <linux/workqueue.h>
#include <linux/kdev_t.h>
#include <linux/string.h>
#include <linux/etherdevice.h>
#include <asm/atomic-long.h>
@ -57,16 +60,10 @@ MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("InfiniBand CM");
MODULE_LICENSE("Dual BSD/GPL");
#define PFX "ib_cm: "
/*
* Limit CM message timeouts to something reasonable:
* 8 seconds per message, with up to 15 retries
*/
static int max_timeout = 21;
module_param(max_timeout, int, 0644);
MODULE_PARM_DESC(max_timeout, "Maximum IB CM per message timeout "
"(default=21, or ~8 seconds)");
#ifdef pr_fmt
#undef pr_fmt
#endif
#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
static void cm_add_one(struct ib_device *device);
static void cm_remove_one(struct ib_device *device);
@ -189,6 +186,8 @@ struct cm_av {
struct ib_ah_attr ah_attr;
u16 pkey_index;
u8 timeout;
u8 valid;
u8 smac[ETH_ALEN];
};
struct cm_work {
@ -358,6 +357,23 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
grh, &av->ah_attr);
}
int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac)
{
struct cm_id_private *cm_id_priv;
cm_id_priv = container_of(id, struct cm_id_private, id);
if (smac != NULL)
memcpy(cm_id_priv->av.smac, smac, sizeof(cm_id_priv->av.smac));
if (alt_smac != NULL)
memcpy(cm_id_priv->alt_av.smac, alt_smac,
sizeof(cm_id_priv->alt_av.smac));
return 0;
}
EXPORT_SYMBOL(ib_update_cm_av);
static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
{
struct cm_device *cm_dev;
@ -388,6 +404,9 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
&av->ah_attr);
av->timeout = path->packet_life_time + 1;
memcpy(av->smac, path->smac, sizeof(av->smac));
av->valid = 1;
return 0;
}
@ -402,7 +421,7 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv)
ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
next_id, &id);
if (!ret)
next_id = ((unsigned) id + 1) & MAX_ID_MASK;
next_id = ((unsigned) id + 1) & MAX_IDR_MASK;
spin_unlock_irqrestore(&cm.lock, flags);
} while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
@ -794,11 +813,11 @@ static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
}
}
static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id, gfp_t flags)
{
struct cm_timewait_info *timewait_info;
timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
timewait_info = kzalloc(sizeof *timewait_info, flags);
if (!timewait_info)
return ERR_PTR(-ENOMEM);
@ -902,6 +921,8 @@ retest:
break;
case IB_CM_ESTABLISHED:
spin_unlock_irq(&cm_id_priv->lock);
if (cm_id_priv->qp_type == IB_QPT_XRC_TGT)
break;
ib_send_cm_dreq(cm_id, NULL, 0);
goto retest;
case IB_CM_DREQ_SENT:
@ -1021,33 +1042,24 @@ static void cm_format_req(struct cm_req_msg *req_msg,
req_msg->service_id = param->service_id;
req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
cm_req_set_resp_res(req_msg, param->responder_resources);
cm_req_set_init_depth(req_msg, param->initiator_depth);
cm_req_set_remote_resp_timeout(req_msg,
param->remote_cm_response_timeout);
if (param->remote_cm_response_timeout > (u8) max_timeout) {
printk(KERN_WARNING PFX "req remote_cm_response_timeout %d > "
"%d, decreasing\n", param->remote_cm_response_timeout,
max_timeout);
cm_req_set_remote_resp_timeout(req_msg, (u8) max_timeout);
}
cm_req_set_qp_type(req_msg, param->qp_type);
cm_req_set_flow_ctrl(req_msg, param->flow_control);
cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
cm_req_set_local_resp_timeout(req_msg,
param->local_cm_response_timeout);
if (param->local_cm_response_timeout > (u8) max_timeout) {
printk(KERN_WARNING PFX "req local_cm_response_timeout %d > "
"%d, decreasing\n", param->local_cm_response_timeout,
max_timeout);
cm_req_set_local_resp_timeout(req_msg, (u8) max_timeout);
}
cm_req_set_retry_count(req_msg, param->retry_count);
req_msg->pkey = param->primary_path->pkey;
cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
if (param->qp_type != IB_QPT_XRC_INI) {
cm_req_set_resp_res(req_msg, param->responder_resources);
cm_req_set_retry_count(req_msg, param->retry_count);
cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
cm_req_set_srq(req_msg, param->srq);
}
if (pri_path->hop_limit <= 1) {
req_msg->primary_local_lid = pri_path->slid;
@ -1105,7 +1117,8 @@ static int cm_validate_req_param(struct ib_cm_req_param *param)
if (!param->primary_path)
return -EINVAL;
if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC)
if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC &&
param->qp_type != IB_QPT_XRC_INI)
return -EINVAL;
if (param->private_data &&
@ -1137,38 +1150,34 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_IDLE) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
ret = -EINVAL;
goto out;
return -EINVAL;
}
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
id.local_id);
id.local_id,
GFP_ATOMIC);
if (IS_ERR(cm_id_priv->timewait_info)) {
ret = PTR_ERR(cm_id_priv->timewait_info);
goto out;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return (PTR_ERR(cm_id_priv->timewait_info));
}
ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av);
if (ret)
goto error1;
if (param->alternate_path) {
if (!ret && param->alternate_path) {
ret = cm_init_av_by_path(param->alternate_path,
&cm_id_priv->alt_av);
if (ret)
}
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
goto error1;
}
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_id->service_id = param->service_id;
cm_id->service_mask = ~cpu_to_be64(0);
cm_id_priv->timeout_ms = cm_convert_to_ms(
param->primary_path->packet_life_time) * 2 +
cm_convert_to_ms(
param->remote_cm_response_timeout);
if (cm_id_priv->timeout_ms > cm_convert_to_ms(max_timeout)) {
printk(KERN_WARNING PFX "req timeout_ms %d > %d, decreasing\n",
cm_id_priv->timeout_ms, cm_convert_to_ms(max_timeout));
cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout);
}
cm_id_priv->max_cm_retries = param->max_cm_retries;
cm_id_priv->initiator_depth = param->initiator_depth;
cm_id_priv->responder_resources = param->responder_resources;
@ -1201,9 +1210,11 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return 0;
error2: cm_free_msg(cm_id_priv->msg);
error1: kfree(cm_id_priv->timewait_info);
out: return ret;
error2:
cm_free_msg(cm_id_priv->msg);
error1:
kfree(cm_id_priv->timewait_info);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_req);
@ -1556,7 +1567,8 @@ static int cm_req_handler(struct cm_work *work)
work->mad_recv_wc->recv_buf.grh,
&cm_id_priv->av);
cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
id.local_id);
id.local_id,
GFP_KERNEL);
if (IS_ERR(cm_id_priv->timewait_info)) {
ret = PTR_ERR(cm_id_priv->timewait_info);
goto destroy;
@ -1579,6 +1591,10 @@ static int cm_req_handler(struct cm_work *work)
cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
/* Workarround: path in req_msg doesn't contain MAC, take it from wc */
memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, 6);
work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id;
ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
if (ret) {
ib_get_cached_gid(work->port->cm_dev->ib_device,
@ -1600,13 +1616,6 @@ static int cm_req_handler(struct cm_work *work)
cm_id_priv->tid = req_msg->hdr.tid;
cm_id_priv->timeout_ms = cm_convert_to_ms(
cm_req_get_local_resp_timeout(req_msg));
if (cm_req_get_local_resp_timeout(req_msg) > (u8) max_timeout) {
printk(KERN_WARNING PFX "rcvd cm_local_resp_timeout %d > %d, "
"decreasing used timeout_ms\n",
cm_req_get_local_resp_timeout(req_msg), max_timeout);
cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout);
}
cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
@ -1638,18 +1647,24 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,
cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
rep_msg->local_comm_id = cm_id_priv->id.local_id;
rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
rep_msg->resp_resources = param->responder_resources;
rep_msg->initiator_depth = param->initiator_depth;
cm_rep_set_target_ack_delay(rep_msg,
cm_id_priv->av.port->cm_dev->ack_delay);
cm_rep_set_failover(rep_msg, param->failover_accepted);
cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
cm_rep_set_srq(rep_msg, param->srq);
rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
rep_msg->initiator_depth = param->initiator_depth;
cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
cm_rep_set_srq(rep_msg, param->srq);
cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
} else {
cm_rep_set_srq(rep_msg, 1);
cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num));
}
if (param->private_data && param->private_data_len)
memcpy(rep_msg->private_data, param->private_data,
param->private_data_len);
@ -1672,6 +1687,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REQ_RCVD &&
cm_id->state != IB_CM_MRA_REQ_SENT) {
pr_debug("cm_id->state: %d\n", cm_id->state);
ret = -EINVAL;
goto out;
}
@ -1697,7 +1713,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
cm_id_priv->initiator_depth = param->initiator_depth;
cm_id_priv->responder_resources = param->responder_resources;
cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
cm_id_priv->local_qpn = cm_rep_get_local_qpn(rep_msg);
cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
@ -1738,6 +1754,7 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REP_RCVD &&
cm_id->state != IB_CM_MRA_REP_SENT) {
pr_debug("cm_id->state: %d\n", cm_id->state);
ret = -EINVAL;
goto error;
}
@ -1768,7 +1785,7 @@ error: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
}
EXPORT_SYMBOL(ib_send_cm_rtu);
static void cm_format_rep_event(struct cm_work *work)
static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
{
struct cm_rep_msg *rep_msg;
struct ib_cm_rep_event_param *param;
@ -1777,7 +1794,7 @@ static void cm_format_rep_event(struct cm_work *work)
param = &work->cm_event.param.rep_rcvd;
param->remote_ca_guid = rep_msg->local_ca_guid;
param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
param->remote_qpn = be32_to_cpu(cm_rep_get_local_qpn(rep_msg));
param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type));
param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
param->responder_resources = rep_msg->initiator_depth;
param->initiator_depth = rep_msg->resp_resources;
@ -1842,10 +1859,11 @@ static int cm_rep_handler(struct cm_work *work)
cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
if (!cm_id_priv) {
cm_dup_rep_handler(work);
pr_debug("no cm_id_priv\n");
return -EINVAL;
}
cm_format_rep_event(work);
cm_format_rep_event(work, cm_id_priv->qp_type);
spin_lock_irq(&cm_id_priv->lock);
switch (cm_id_priv->id.state) {
@ -1855,12 +1873,13 @@ static int cm_rep_handler(struct cm_work *work)
default:
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
pr_debug("cm_id_priv->id.state: %d\n", cm_id_priv->id.state);
goto error;
}
cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg);
cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
spin_lock(&cm.lock);
/* Check for duplicate REP. */
@ -1868,6 +1887,7 @@ static int cm_rep_handler(struct cm_work *work)
spin_unlock(&cm.lock);
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
pr_debug("Failed to insert remote id\n");
goto error;
}
/* Check for a stale connection. */
@ -1881,13 +1901,14 @@ static int cm_rep_handler(struct cm_work *work)
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
NULL, 0);
ret = -EINVAL;
pr_debug("Stale connection.\n");
goto error;
}
spin_unlock(&cm.lock);
cm_id_priv->id.state = IB_CM_REP_RCVD;
cm_id_priv->id.remote_id = rep_msg->local_comm_id;
cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg);
cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
cm_id_priv->initiator_depth = rep_msg->resp_resources;
cm_id_priv->responder_resources = rep_msg->initiator_depth;
cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
@ -2021,10 +2042,15 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id,
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_ESTABLISHED) {
pr_debug("cm_id->state: %d\n", cm_id->state);
ret = -EINVAL;
goto out;
}
if (cm_id->lap_state == IB_CM_LAP_SENT ||
cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
ret = cm_alloc_msg(cm_id_priv, &msg);
if (ret) {
cm_enter_timewait(cm_id_priv);
@ -2086,6 +2112,7 @@ int ib_send_cm_drep(struct ib_cm_id *cm_id,
if (cm_id->state != IB_CM_DREQ_RCVD) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
kfree(data);
pr_debug("cm_id->state(%d) != IB_CM_DREQ_RCVD\n", cm_id->state);
return -EINVAL;
}
@ -2151,6 +2178,7 @@ static int cm_dreq_handler(struct cm_work *work)
atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
counter[CM_DREQ_COUNTER]);
cm_issue_drep(work->port, work->mad_recv_wc);
pr_debug("no cm_id_priv\n");
return -EINVAL;
}
@ -2166,6 +2194,10 @@ static int cm_dreq_handler(struct cm_work *work)
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
break;
case IB_CM_ESTABLISHED:
if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
break;
case IB_CM_MRA_REP_RCVD:
break;
case IB_CM_TIMEWAIT:
@ -2187,6 +2219,7 @@ static int cm_dreq_handler(struct cm_work *work)
counter[CM_DREQ_COUNTER]);
goto unlock;
default:
pr_debug("cm_id_priv->id.state: %d\n", cm_id_priv->id.state);
goto unlock;
}
cm_id_priv->id.state = IB_CM_DREQ_RCVD;
@ -2290,6 +2323,7 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
cm_enter_timewait(cm_id_priv);
break;
default:
pr_debug("cm_id->state: 0x%x\n", cm_id->state);
ret = -EINVAL;
goto out;
}
@ -2386,11 +2420,21 @@ static int cm_rej_handler(struct cm_work *work)
/* fall through */
case IB_CM_REP_RCVD:
case IB_CM_MRA_REP_SENT:
case IB_CM_ESTABLISHED:
cm_enter_timewait(cm_id_priv);
break;
case IB_CM_ESTABLISHED:
if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
ib_cancel_mad(cm_id_priv->av.port->mad_agent,
cm_id_priv->msg);
cm_enter_timewait(cm_id_priv);
break;
}
/* fall through */
default:
spin_unlock_irq(&cm_id_priv->lock);
pr_debug("cm_id_priv->id.state: 0x%x\n", cm_id_priv->id.state);
ret = -EINVAL;
goto out;
}
@ -2453,6 +2497,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id,
break;
}
default:
pr_debug("cm_id_priv->id.state: 0x%x\n", cm_id_priv->id.state);
ret = -EINVAL;
goto error1;
}
@ -2518,12 +2563,6 @@ static int cm_mra_handler(struct cm_work *work)
cm_mra_get_service_timeout(mra_msg);
timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
cm_convert_to_ms(cm_id_priv->av.timeout);
if (timeout > cm_convert_to_ms(max_timeout)) {
printk(KERN_WARNING PFX "calculated mra timeout %d > %d, "
"decreasing used timeout_ms\n", timeout,
cm_convert_to_ms(max_timeout));
timeout = cm_convert_to_ms(max_timeout);
}
spin_lock_irq(&cm_id_priv->lock);
switch (cm_id_priv->id.state) {
@ -2560,6 +2599,7 @@ static int cm_mra_handler(struct cm_work *work)
counter[CM_MRA_COUNTER]);
/* fall through */
default:
pr_debug("cm_id_priv->id.state: 0x%x\n", cm_id_priv->id.state);
goto out;
}
@ -2746,7 +2786,8 @@ static int cm_lap_handler(struct cm_work *work)
cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
work->mad_recv_wc->recv_buf.grh,
&cm_id_priv->av);
cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av);
if (cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av))
goto unlock;
ret = atomic_inc_and_test(&cm_id_priv->work_count);
if (!ret)
list_add_tail(&work->list, &cm_id_priv->work_list);
@ -2938,6 +2979,9 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
return -EINVAL;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
ret = cm_init_av_by_path(param->path, &cm_id_priv->av);
if (ret)
goto out;
@ -2945,12 +2989,6 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
cm_id->service_id = param->service_id;
cm_id->service_mask = ~cpu_to_be64(0);
cm_id_priv->timeout_ms = param->timeout_ms;
if (cm_id_priv->timeout_ms > cm_convert_to_ms(max_timeout)) {
printk(KERN_WARNING PFX "sidr req timeout_ms %d > %d, "
"decreasing used timeout_ms\n", param->timeout_ms,
cm_convert_to_ms(max_timeout));
cm_id_priv->timeout_ms = cm_convert_to_ms(max_timeout);
}
cm_id_priv->max_cm_retries = param->max_cm_retries;
ret = cm_alloc_msg(cm_id_priv, &msg);
if (ret)
@ -2961,21 +2999,19 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
msg->timeout_ms = cm_id_priv->timeout_ms;
msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state == IB_CM_IDLE)
ret = ib_post_send_mad(msg, NULL);
else
ret = -EINVAL;
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_free_msg(msg);
goto out;
}
cm_id->state = IB_CM_SIDR_REQ_SENT;
cm_id_priv->msg = msg;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
out:
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_sidr_req);
@ -3038,6 +3074,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
goto out; /* No match. */
}
atomic_inc(&cur_cm_id_priv->refcount);
atomic_inc(&cm_id_priv->refcount);
spin_unlock_irq(&cm.lock);
cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
@ -3302,6 +3339,7 @@ static void cm_work_handler(struct work_struct *_work)
ret = cm_timewait_handler(work);
break;
default:
pr_debug("work->cm_event.event: 0x%x\n", work->cm_event.event);
ret = -EINVAL;
break;
}
@ -3332,6 +3370,7 @@ static int cm_establish(struct ib_cm_id *cm_id)
ret = -EISCONN;
break;
default:
pr_debug("cm_id->state: 0x%x\n", cm_id->state);
ret = -EINVAL;
break;
}
@ -3494,6 +3533,7 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
pr_debug("cm_id_priv->id.state: 0x%x\n", cm_id_priv->id.state);
ret = -EINVAL;
break;
}
@ -3520,10 +3560,36 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
IB_QP_DEST_QPN | IB_QP_RQ_PSN;
qp_attr->ah_attr = cm_id_priv->av.ah_attr;
if (!cm_id_priv->av.valid)
return -EINVAL;
if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) {
qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id;
*qp_attr_mask |= IB_QP_VID;
}
if (!is_zero_ether_addr(cm_id_priv->av.smac)) {
memcpy(qp_attr->smac, cm_id_priv->av.smac,
sizeof(qp_attr->smac));
*qp_attr_mask |= IB_QP_SMAC;
}
if (cm_id_priv->alt_av.valid) {
if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) {
qp_attr->alt_vlan_id =
cm_id_priv->alt_av.ah_attr.vlan_id;
*qp_attr_mask |= IB_QP_ALT_VID;
}
if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) {
memcpy(qp_attr->alt_smac,
cm_id_priv->alt_av.smac,
sizeof(qp_attr->alt_smac));
*qp_attr_mask |= IB_QP_ALT_SMAC;
}
}
qp_attr->path_mtu = cm_id_priv->path_mtu;
qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
if (cm_id_priv->qp_type == IB_QPT_RC) {
if (cm_id_priv->qp_type == IB_QPT_RC ||
cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
*qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
IB_QP_MIN_RNR_TIMER;
qp_attr->max_dest_rd_atomic =
@ -3540,6 +3606,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
pr_debug("cm_id_priv->id.state: 0x%x\n", cm_id_priv->id.state);
ret = -EINVAL;
break;
}
@ -3568,15 +3635,21 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
*qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
if (cm_id_priv->qp_type == IB_QPT_RC) {
*qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
IB_QP_RNR_RETRY |
switch (cm_id_priv->qp_type) {
case IB_QPT_RC:
case IB_QPT_XRC_INI:
*qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
IB_QP_MAX_QP_RD_ATOMIC;
qp_attr->timeout = cm_id_priv->av.timeout;
qp_attr->retry_cnt = cm_id_priv->retry_count;
qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
qp_attr->max_rd_atomic =
cm_id_priv->initiator_depth;
qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
/* fall through */
case IB_QPT_XRC_TGT:
*qp_attr_mask |= IB_QP_TIMEOUT;
qp_attr->timeout = cm_id_priv->av.timeout;
break;
default:
break;
}
if (cm_id_priv->alt_av.ah_attr.dlid) {
*qp_attr_mask |= IB_QP_PATH_MIG_STATE;
@ -3593,6 +3666,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
pr_debug("cm_id_priv->id.state: 0x%x\n", cm_id_priv->id.state);
ret = -EINVAL;
break;
}
@ -3619,6 +3693,7 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask);
break;
default:
pr_debug("qp_attr->qp_state: 0x%x\n", qp_attr->qp_state);
ret = -EINVAL;
break;
}
@ -3649,7 +3724,7 @@ static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
atomic_long_read(&group->counter[cm_attr->index]));
}
static struct sysfs_ops cm_counter_ops = {
static const struct sysfs_ops cm_counter_ops = {
.show = cm_show_counter
};
@ -3670,8 +3745,17 @@ static struct kobj_type cm_port_obj_type = {
.release = cm_release_port_obj
};
static char *cm_devnode(struct device *dev, umode_t *mode)
{
if (mode)
*mode = 0666;
return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
}
struct class cm_class = {
.owner = THIS_MODULE,
.name = "infiniband_cm",
.devnode = cm_devnode,
};
EXPORT_SYMBOL(cm_class);
@ -3745,7 +3829,7 @@ static void cm_add_one(struct ib_device *ib_device)
cm_dev->device = device_create(&cm_class, &ib_device->dev,
MKDEV(0, 0), NULL,
"%s", ib_device->name);
if (!cm_dev->device) {
if (IS_ERR(cm_dev->device)) {
kfree(cm_dev);
return;
}
@ -3846,28 +3930,33 @@ static int __init ib_cm_init(void)
cm.remote_sidr_table = RB_ROOT;
idr_init(&cm.local_id_table);
get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
idr_pre_get(&cm.local_id_table, GFP_KERNEL);
if (!idr_pre_get(&cm.local_id_table, GFP_KERNEL))
return -ENOMEM;
INIT_LIST_HEAD(&cm.timewait_list);
ret = class_register(&cm_class);
if (ret)
return -ENOMEM;
cm.wq = create_workqueue("ib_cm");
if (!cm.wq) {
if (ret) {
ret = -ENOMEM;
goto error1;
}
cm.wq = create_workqueue("ib_cm");
if (!cm.wq) {
ret = -ENOMEM;
goto error2;
}
ret = ib_register_client(&cm_client);
if (ret)
goto error2;
goto error3;
return 0;
error2:
error3:
destroy_workqueue(cm.wq);
error1:
error2:
class_unregister(&cm_class);
error1:
idr_destroy(&cm.local_id_table);
return ret;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004 Intel Corporation. All rights reserved.
* Copyright (c) 2004, 2011 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved.
*
@ -44,18 +44,6 @@
#define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */
#define CM_REQ_ATTR_ID cpu_to_be16(0x0010)
#define CM_MRA_ATTR_ID cpu_to_be16(0x0011)
#define CM_REJ_ATTR_ID cpu_to_be16(0x0012)
#define CM_REP_ATTR_ID cpu_to_be16(0x0013)
#define CM_RTU_ATTR_ID cpu_to_be16(0x0014)
#define CM_DREQ_ATTR_ID cpu_to_be16(0x0015)
#define CM_DREP_ATTR_ID cpu_to_be16(0x0016)
#define CM_SIDR_REQ_ATTR_ID cpu_to_be16(0x0017)
#define CM_SIDR_REP_ATTR_ID cpu_to_be16(0x0018)
#define CM_LAP_ATTR_ID cpu_to_be16(0x0019)
#define CM_APR_ATTR_ID cpu_to_be16(0x001A)
enum cm_msg_sequence {
CM_MSG_SEQUENCE_REQ,
CM_MSG_SEQUENCE_LAP,
@ -86,7 +74,7 @@ struct cm_req_msg {
__be16 pkey;
/* path MTU:4, RDC exists:1, RNR retry count:3. */
u8 offset50;
/* max CM Retries:4, SRQ:1, rsvd:3 */
/* max CM Retries:4, SRQ:1, extended transport type:3 */
u8 offset51;
__be16 primary_local_lid;
@ -175,6 +163,11 @@ static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg)
switch(transport_type) {
case 0: return IB_QPT_RC;
case 1: return IB_QPT_UC;
case 3:
switch (req_msg->offset51 & 0x7) {
case 1: return IB_QPT_XRC_TGT;
default: return 0;
}
default: return 0;
}
}
@ -188,6 +181,12 @@ static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg,
req_msg->offset40) &
0xFFFFFFF9) | 0x2);
break;
case IB_QPT_XRC_INI:
req_msg->offset40 = cpu_to_be32((be32_to_cpu(
req_msg->offset40) &
0xFFFFFFF9) | 0x6);
req_msg->offset51 = (req_msg->offset51 & 0xF8) | 1;
break;
default:
req_msg->offset40 = cpu_to_be32(be32_to_cpu(
req_msg->offset40) &
@ -527,6 +526,23 @@ static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, __be32 qpn)
(be32_to_cpu(rep_msg->offset12) & 0x000000FF));
}
static inline __be32 cm_rep_get_local_eecn(struct cm_rep_msg *rep_msg)
{
return cpu_to_be32(be32_to_cpu(rep_msg->offset16) >> 8);
}
static inline void cm_rep_set_local_eecn(struct cm_rep_msg *rep_msg, __be32 eecn)
{
rep_msg->offset16 = cpu_to_be32((be32_to_cpu(eecn) << 8) |
(be32_to_cpu(rep_msg->offset16) & 0x000000FF));
}
static inline __be32 cm_rep_get_qpn(struct cm_rep_msg *rep_msg, enum ib_qp_type qp_type)
{
return (qp_type == IB_QPT_XRC_INI) ?
cm_rep_get_local_eecn(rep_msg) : cm_rep_get_local_qpn(rep_msg);
}
static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg)
{
return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8);
@ -771,6 +787,7 @@ struct cm_apr_msg {
u8 info_length;
u8 ap_status;
__be16 rsvd;
u8 info[IB_CM_APR_INFO_LENGTH];
u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE];

File diff suppressed because it is too large Load Diff

View File

@ -38,7 +38,8 @@
#include <rdma/ib_verbs.h>
int ib_device_register_sysfs(struct ib_device *device, int (*port_callback)(struct ib_device *,
int ib_device_register_sysfs(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *));
void ib_device_unregister_sysfs(struct ib_device *device);

View File

@ -37,7 +37,6 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mutex.h>
#include <linux/workqueue.h>
#include "core_priv.h"
@ -45,18 +44,15 @@ MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("core kernel InfiniBand API");
MODULE_LICENSE("Dual BSD/GPL");
#ifdef __ia64__
/* workaround for a bug in hp chipset that would cause kernel
panic when dma resources are exhaused */
int dma_map_sg_hp_wa = 0;
#endif
struct ib_client_data {
struct list_head list;
struct ib_client *client;
void * data;
};
struct workqueue_struct *ib_wq;
EXPORT_SYMBOL_GPL(ib_wq);
static LIST_HEAD(device_list);
static LIST_HEAD(client_list);
@ -99,7 +95,7 @@ static int ib_device_check_mandatory(struct ib_device *device)
int i;
for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
if (!*(void **) ((u_char *) device + mandatory_table[i].offset)) {
if (!*(void **) ((void *) device + mandatory_table[i].offset)) {
printk(KERN_WARNING "Device %s is missing mandatory function %s\n",
device->name, mandatory_table[i].name);
return -EINVAL;
@ -177,9 +173,14 @@ static int end_port(struct ib_device *device)
*/
struct ib_device *ib_alloc_device(size_t size)
{
struct ib_device *dev;
BUG_ON(size < sizeof (struct ib_device));
return kzalloc(size, GFP_KERNEL);
dev = kzalloc(size, GFP_KERNEL);
spin_lock_init(&dev->cmd_perf_lock);
return dev;
}
EXPORT_SYMBOL(ib_alloc_device);
@ -295,8 +296,6 @@ int ib_register_device(struct ib_device *device,
INIT_LIST_HEAD(&device->client_data_list);
spin_lock_init(&device->event_handler_lock);
spin_lock_init(&device->client_data_lock);
device->ib_uverbs_xrcd_table = RB_ROOT;
mutex_init(&device->xrcd_table_mutex);
ret = read_port_table_lengths(device);
if (ret) {
@ -631,6 +630,9 @@ int ib_modify_device(struct ib_device *device,
int device_modify_mask,
struct ib_device_modify *device_modify)
{
if (!device->modify_device)
return -ENOSYS;
return device->modify_device(device, device_modify_mask,
device_modify);
}
@ -651,6 +653,9 @@ int ib_modify_port(struct ib_device *device,
u8 port_num, int port_modify_mask,
struct ib_port_modify *port_modify)
{
if (!device->modify_port)
return -ENOSYS;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
@ -705,18 +710,28 @@ int ib_find_pkey(struct ib_device *device,
{
int ret, i;
u16 tmp_pkey;
int partial_ix = -1;
for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) {
ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
if (ret)
return ret;
if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) {
*index = i;
return 0;
/* if there is full-member pkey take it.*/
if (tmp_pkey & 0x8000) {
*index = i;
return 0;
}
if (partial_ix < 0)
partial_ix = i;
}
}
/*no full-member, if exists take the limited*/
if (partial_ix >= 0) {
*index = partial_ix;
return 0;
}
return -ENOENT;
}
EXPORT_SYMBOL(ib_find_pkey);
@ -725,21 +740,29 @@ static int __init ib_core_init(void)
{
int ret;
#ifdef __ia64__
if (ia64_platform_is("hpzx1"))
dma_map_sg_hp_wa = 1;
#endif
ib_wq = create_workqueue("infiniband");
if (!ib_wq)
return -ENOMEM;
ret = ib_sysfs_setup();
if (ret)
if (ret) {
printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
goto err;
}
ret = ib_cache_setup();
if (ret) {
printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n");
ib_sysfs_cleanup();
goto err_sysfs;
}
return 0;
err_sysfs:
ib_sysfs_cleanup();
err:
destroy_workqueue(ib_wq);
return ret;
}
@ -748,7 +771,7 @@ static void __exit ib_core_cleanup(void)
ib_cache_cleanup();
ib_sysfs_cleanup();
/* Make sure that any pending umem accounting work is done. */
flush_scheduled_work();
destroy_workqueue(ib_wq);
}
module_init(ib_core_init);

View File

@ -33,6 +33,7 @@
#include <linux/errno.h>
#include <linux/spinlock.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/jhash.h>
#include <linux/kthread.h>
@ -150,7 +151,7 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
#ifdef DEBUG
if (fmr->ref_count !=0) {
printk(KERN_WARNING PFX "Unmapping FMR %p with ref count %d\n",
printk(KERN_WARNING PFX "Unmapping FMR 0x%08x with ref count %d\n",
fmr, fmr->ref_count);
}
#endif

View File

@ -40,9 +40,12 @@
#include <linux/idr.h>
#include <linux/interrupt.h>
#include <linux/rbtree.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
#include <linux/completion.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/string.h>
#include <rdma/iw_cm.h>
@ -507,6 +510,8 @@ int iw_cm_accept(struct iw_cm_id *cm_id,
qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
if (!qp) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
wake_up_all(&cm_id_priv->connect_wait);
return -EINVAL;
}
cm_id->device->iwcm->add_ref(qp);
@ -566,6 +571,8 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
if (!qp) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
wake_up_all(&cm_id_priv->connect_wait);
return -EINVAL;
}
cm_id->device->iwcm->add_ref(qp);
@ -620,17 +627,6 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
*/
BUG_ON(iw_event->status);
/*
* We could be destroying the listening id. If so, ignore this
* upcall.
*/
spin_lock_irqsave(&listen_id_priv->lock, flags);
if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
spin_unlock_irqrestore(&listen_id_priv->lock, flags);
goto out;
}
spin_unlock_irqrestore(&listen_id_priv->lock, flags);
cm_id = iw_create_cm_id(listen_id_priv->id.device,
iw_event->so,
listen_id_priv->id.cm_handler,
@ -646,6 +642,19 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
cm_id_priv->state = IW_CM_STATE_CONN_RECV;
/*
* We could be destroying the listening id. If so, ignore this
* upcall.
*/
spin_lock_irqsave(&listen_id_priv->lock, flags);
if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
spin_unlock_irqrestore(&listen_id_priv->lock, flags);
iw_cm_reject(cm_id, NULL, 0);
iw_destroy_cm_id(cm_id);
goto out;
}
spin_unlock_irqrestore(&listen_id_priv->lock, flags);
ret = alloc_work_entries(cm_id_priv, 3);
if (ret) {
iw_cm_reject(cm_id, NULL, 0);
@ -723,7 +732,7 @@ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
*/
clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
if (iw_event->status == IW_CM_EVENT_STATUS_ACCEPTED) {
if (iw_event->status == 0) {
cm_id_priv->id.local_addr = iw_event->local_addr;
cm_id_priv->id.remote_addr = iw_event->remote_addr;
cm_id_priv->state = IW_CM_STATE_ESTABLISHED;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -102,8 +102,7 @@ struct ib_mad_agent_private {
struct list_head send_list;
struct list_head wait_list;
struct list_head done_list;
struct work_struct timeout_work;
struct timer_list timeout_timer;
struct delayed_work timed_work;
unsigned long timeout;
struct list_head local_list;
struct work_struct local_work;
@ -122,6 +121,14 @@ struct ib_mad_snoop_private {
struct completion comp;
};
/* Structure for timeout-fifo entry */
struct tf_entry {
unsigned long exp_time; /* entry expiration time */
struct list_head fifo_list; /* to keep entries in fifo order */
struct list_head to_list; /* to keep entries in timeout order */
int canceled; /* indicates whether entry is canceled */
};
struct ib_mad_send_wr_private {
struct ib_mad_list_head mad_list;
struct list_head agent_list;
@ -147,6 +154,10 @@ struct ib_mad_send_wr_private {
int seg_num;
int newwin;
int pad;
/* SA congestion controlled MAD */
int is_sa_cc_mad;
struct tf_entry tf_list;
};
struct ib_mad_local_private {
@ -198,6 +209,25 @@ struct ib_mad_qp_info {
atomic_t snoop_count;
};
struct to_fifo {
struct list_head to_head;
struct list_head fifo_head;
spinlock_t lists_lock;
struct timer_list timer;
struct work_struct work;
u32 fifo_size;
u32 num_items;
int stop_enqueue;
struct workqueue_struct *workq;
};
/* SA congestion control data */
struct sa_cc_data {
spinlock_t lock;
unsigned long outstanding;
struct to_fifo *tf;
};
struct ib_mad_port_private {
struct list_head port_list;
struct ib_device *device;
@ -212,6 +242,7 @@ struct ib_mad_port_private {
struct workqueue_struct *wq;
struct work_struct work;
struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
struct sa_cc_data sa_cc;
};
int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr);

View File

@ -31,6 +31,8 @@
* SOFTWARE.
*/
#include <linux/slab.h>
#include "mad_priv.h"
#include "mad_rmpp.h"

View File

@ -34,12 +34,27 @@
#include <linux/dma-mapping.h>
#include <linux/err.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/bitops.h>
#include <linux/random.h>
#include <linux/moduleparam.h>
#include <linux/rbtree.h>
#include <rdma/ib_cache.h>
#include "sa.h"
static int mcast_leave_retries = 3;
/*static const struct kernel_param_ops retry_ops = {
.set = param_set_int,
.get = param_get_int,
};
module_param_cb(mcast_leave_retries, &retry_ops, &mcast_leave_retries, 0644);
MODULE_PARM_DESC(mcast_leave_retries, "Number of retries for multicast leave "
"requests before giving up (default: 3)");
*/
static void mcast_add_one(struct ib_device *device);
static void mcast_remove_one(struct ib_device *device);
@ -250,6 +265,34 @@ static u8 get_leave_state(struct mcast_group *group)
return leave_state & group->rec.join_state;
}
static int check_selector(ib_sa_comp_mask comp_mask,
ib_sa_comp_mask selector_mask,
ib_sa_comp_mask value_mask,
u8 selector, u8 src_value, u8 dst_value)
{
int err;
if (!(comp_mask & selector_mask) || !(comp_mask & value_mask))
return 0;
switch (selector) {
case IB_SA_GT:
err = (src_value <= dst_value);
break;
case IB_SA_LT:
err = (src_value >= dst_value);
break;
case IB_SA_EQ:
err = (src_value != dst_value);
break;
default:
err = 0;
break;
}
return err;
}
static int cmp_rec(struct ib_sa_mcmember_rec *src,
struct ib_sa_mcmember_rec *dst, ib_sa_comp_mask comp_mask)
{
@ -262,7 +305,7 @@ static int cmp_rec(struct ib_sa_mcmember_rec *src,
return -EINVAL;
if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid)
return -EINVAL;
if (ib_sa_check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR,
if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR,
IB_SA_MCMEMBER_REC_MTU, dst->mtu_selector,
src->mtu, dst->mtu))
return -EINVAL;
@ -271,11 +314,11 @@ static int cmp_rec(struct ib_sa_mcmember_rec *src,
return -EINVAL;
if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey)
return -EINVAL;
if (ib_sa_check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR,
if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR,
IB_SA_MCMEMBER_REC_RATE, dst->rate_selector,
src->rate, dst->rate))
return -EINVAL;
if (ib_sa_check_selector(comp_mask,
if (check_selector(comp_mask,
IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR,
IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME,
dst->packet_life_time_selector,
@ -517,11 +560,15 @@ static void leave_handler(int status, struct ib_sa_mcmember_rec *rec,
{
struct mcast_group *group = context;
if (status && (group->retries > 0) &&
if (status && group->retries > 0 &&
!send_leave(group, group->leave_state))
group->retries--;
else
else {
if (status && group->retries <= 0)
printk(KERN_WARNING "reached max retry count. "
"status=%d. Giving up\n", status);
mcast_work_handler(&group->work);
}
}
static struct mcast_group *acquire_group(struct mcast_port *port,
@ -544,7 +591,7 @@ static struct mcast_group *acquire_group(struct mcast_port *port,
if (!group)
return NULL;
group->retries = 3;
group->retries = mcast_leave_retries;
group->port = port;
group->rec.mgid = *mgid;
group->pkey_index = MCAST_INVALID_PKEY_INDEX;
@ -754,7 +801,6 @@ static void mcast_event_handler(struct ib_event_handler *handler,
switch (event->event) {
case IB_EVENT_PORT_ERR:
case IB_EVENT_LID_CHANGE:
case IB_EVENT_SM_CHANGE:
case IB_EVENT_CLIENT_REREGISTER:
mcast_groups_event(&dev->port[index], MCAST_GROUP_ERROR);
break;

View File

@ -1,749 +0,0 @@
/*
* Copyright (c) 2006 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/completion.h>
#include <linux/dma-mapping.h>
#include <linux/err.h>
#include <linux/interrupt.h>
#include <linux/pci.h>
#include <linux/bitops.h>
#include <linux/random.h>
#include "sa.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("InfiniBand InformInfo & Notice event handling");
MODULE_LICENSE("Dual BSD/GPL");
static void inform_add_one(struct ib_device *device);
static void inform_remove_one(struct ib_device *device);
static struct ib_client inform_client = {
.name = "ib_notice",
.add = inform_add_one,
.remove = inform_remove_one
};
static struct ib_sa_client sa_client;
static struct workqueue_struct *inform_wq;
struct inform_device;
struct inform_port {
struct inform_device *dev;
spinlock_t lock;
struct rb_root table;
atomic_t refcount;
struct completion comp;
u8 port_num;
};
struct inform_device {
struct ib_device *device;
struct ib_event_handler event_handler;
int start_port;
int end_port;
struct inform_port port[0];
};
enum inform_state {
INFORM_IDLE,
INFORM_REGISTERING,
INFORM_MEMBER,
INFORM_BUSY,
INFORM_ERROR
};
struct inform_member;
struct inform_group {
u16 trap_number;
struct rb_node node;
struct inform_port *port;
spinlock_t lock;
struct work_struct work;
struct list_head pending_list;
struct list_head active_list;
struct list_head notice_list;
struct inform_member *last_join;
int members;
enum inform_state join_state; /* State relative to SA */
atomic_t refcount;
enum inform_state state;
struct ib_sa_query *query;
int query_id;
};
struct inform_member {
struct ib_inform_info info;
struct ib_sa_client *client;
struct inform_group *group;
struct list_head list;
enum inform_state state;
atomic_t refcount;
struct completion comp;
};
struct inform_notice {
struct list_head list;
struct ib_sa_notice notice;
};
static void reg_handler(int status, struct ib_sa_inform *inform,
void *context);
static void unreg_handler(int status, struct ib_sa_inform *inform,
void *context);
static struct inform_group *inform_find(struct inform_port *port,
u16 trap_number)
{
struct rb_node *node = port->table.rb_node;
struct inform_group *group;
while (node) {
group = rb_entry(node, struct inform_group, node);
if (trap_number < group->trap_number)
node = node->rb_left;
else if (trap_number > group->trap_number)
node = node->rb_right;
else
return group;
}
return NULL;
}
static struct inform_group *inform_insert(struct inform_port *port,
struct inform_group *group)
{
struct rb_node **link = &port->table.rb_node;
struct rb_node *parent = NULL;
struct inform_group *cur_group;
while (*link) {
parent = *link;
cur_group = rb_entry(parent, struct inform_group, node);
if (group->trap_number < cur_group->trap_number)
link = &(*link)->rb_left;
else if (group->trap_number > cur_group->trap_number)
link = &(*link)->rb_right;
else
return cur_group;
}
rb_link_node(&group->node, parent, link);
rb_insert_color(&group->node, &port->table);
return NULL;
}
static void deref_port(struct inform_port *port)
{
if (atomic_dec_and_test(&port->refcount))
complete(&port->comp);
}
static void release_group(struct inform_group *group)
{
struct inform_port *port = group->port;
unsigned long flags;
spin_lock_irqsave(&port->lock, flags);
if (atomic_dec_and_test(&group->refcount)) {
rb_erase(&group->node, &port->table);
spin_unlock_irqrestore(&port->lock, flags);
kfree(group);
deref_port(port);
} else
spin_unlock_irqrestore(&port->lock, flags);
}
static void deref_member(struct inform_member *member)
{
if (atomic_dec_and_test(&member->refcount))
complete(&member->comp);
}
static void queue_reg(struct inform_member *member)
{
struct inform_group *group = member->group;
unsigned long flags;
spin_lock_irqsave(&group->lock, flags);
list_add(&member->list, &group->pending_list);
if (group->state == INFORM_IDLE) {
group->state = INFORM_BUSY;
atomic_inc(&group->refcount);
queue_work(inform_wq, &group->work);
}
spin_unlock_irqrestore(&group->lock, flags);
}
static int send_reg(struct inform_group *group, struct inform_member *member)
{
struct inform_port *port = group->port;
struct ib_sa_inform inform;
int ret;
memset(&inform, 0, sizeof inform);
inform.lid_range_begin = cpu_to_be16(0xFFFF);
inform.is_generic = 1;
inform.subscribe = 1;
inform.type = cpu_to_be16(IB_SA_EVENT_TYPE_ALL);
inform.trap.generic.trap_num = cpu_to_be16(member->info.trap_number);
inform.trap.generic.resp_time = 19;
inform.trap.generic.producer_type =
cpu_to_be32(IB_SA_EVENT_PRODUCER_TYPE_ALL);
group->last_join = member;
ret = ib_sa_informinfo_query(&sa_client, port->dev->device,
port->port_num, &inform, 3000, GFP_KERNEL,
reg_handler, group,&group->query);
if (ret >= 0) {
group->query_id = ret;
ret = 0;
}
return ret;
}
static int send_unreg(struct inform_group *group)
{
struct inform_port *port = group->port;
struct ib_sa_inform inform;
int ret;
memset(&inform, 0, sizeof inform);
inform.lid_range_begin = cpu_to_be16(0xFFFF);
inform.is_generic = 1;
inform.type = cpu_to_be16(IB_SA_EVENT_TYPE_ALL);
inform.trap.generic.trap_num = cpu_to_be16(group->trap_number);
inform.trap.generic.qpn = IB_QP1;
inform.trap.generic.resp_time = 19;
inform.trap.generic.producer_type =
cpu_to_be32(IB_SA_EVENT_PRODUCER_TYPE_ALL);
ret = ib_sa_informinfo_query(&sa_client, port->dev->device,
port->port_num, &inform, 3000, GFP_KERNEL,
unreg_handler, group, &group->query);
if (ret >= 0) {
group->query_id = ret;
ret = 0;
}
return ret;
}
static void join_group(struct inform_group *group, struct inform_member *member)
{
member->state = INFORM_MEMBER;
group->members++;
list_move(&member->list, &group->active_list);
}
static int fail_join(struct inform_group *group, struct inform_member *member,
int status)
{
spin_lock_irq(&group->lock);
list_del_init(&member->list);
spin_unlock_irq(&group->lock);
return member->info.callback(status, &member->info, NULL);
}
static void process_group_error(struct inform_group *group)
{
struct inform_member *member;
int ret;
spin_lock_irq(&group->lock);
while (!list_empty(&group->active_list)) {
member = list_entry(group->active_list.next,
struct inform_member, list);
atomic_inc(&member->refcount);
list_del_init(&member->list);
group->members--;
member->state = INFORM_ERROR;
spin_unlock_irq(&group->lock);
ret = member->info.callback(-ENETRESET, &member->info, NULL);
deref_member(member);
if (ret)
ib_sa_unregister_inform_info(&member->info);
spin_lock_irq(&group->lock);
}
group->join_state = INFORM_IDLE;
group->state = INFORM_BUSY;
spin_unlock_irq(&group->lock);
}
/*
* Report a notice to all active subscribers. We use a temporary list to
* handle unsubscription requests while the notice is being reported, which
* avoids holding the group lock while in the user's callback.
*/
static void process_notice(struct inform_group *group,
struct inform_notice *info_notice)
{
struct inform_member *member;
struct list_head list;
int ret;
INIT_LIST_HEAD(&list);
spin_lock_irq(&group->lock);
list_splice_init(&group->active_list, &list);
while (!list_empty(&list)) {
member = list_entry(list.next, struct inform_member, list);
atomic_inc(&member->refcount);
list_move(&member->list, &group->active_list);
spin_unlock_irq(&group->lock);
ret = member->info.callback(0, &member->info,
&info_notice->notice);
deref_member(member);
if (ret)
ib_sa_unregister_inform_info(&member->info);
spin_lock_irq(&group->lock);
}
spin_unlock_irq(&group->lock);
}
static void inform_work_handler(struct work_struct *work)
{
struct inform_group *group;
struct inform_member *member;
struct ib_inform_info *info;
struct inform_notice *info_notice;
int status, ret;
group = container_of(work, typeof(*group), work);
retest:
spin_lock_irq(&group->lock);
while (!list_empty(&group->pending_list) ||
!list_empty(&group->notice_list) ||
(group->state == INFORM_ERROR)) {
if (group->state == INFORM_ERROR) {
spin_unlock_irq(&group->lock);
process_group_error(group);
goto retest;
}
if (!list_empty(&group->notice_list)) {
info_notice = list_entry(group->notice_list.next,
struct inform_notice, list);
list_del(&info_notice->list);
spin_unlock_irq(&group->lock);
process_notice(group, info_notice);
kfree(info_notice);
goto retest;
}
member = list_entry(group->pending_list.next,
struct inform_member, list);
info = &member->info;
atomic_inc(&member->refcount);
if (group->join_state == INFORM_MEMBER) {
join_group(group, member);
spin_unlock_irq(&group->lock);
ret = info->callback(0, info, NULL);
} else {
spin_unlock_irq(&group->lock);
status = send_reg(group, member);
if (!status) {
deref_member(member);
return;
}
ret = fail_join(group, member, status);
}
deref_member(member);
if (ret)
ib_sa_unregister_inform_info(&member->info);
spin_lock_irq(&group->lock);
}
if (!group->members && (group->join_state == INFORM_MEMBER)) {
group->join_state = INFORM_IDLE;
spin_unlock_irq(&group->lock);
if (send_unreg(group))
goto retest;
} else {
group->state = INFORM_IDLE;
spin_unlock_irq(&group->lock);
release_group(group);
}
}
/*
* Fail a join request if it is still active - at the head of the pending queue.
*/
static void process_join_error(struct inform_group *group, int status)
{
struct inform_member *member;
int ret;
spin_lock_irq(&group->lock);
member = list_entry(group->pending_list.next,
struct inform_member, list);
if (group->last_join == member) {
atomic_inc(&member->refcount);
list_del_init(&member->list);
spin_unlock_irq(&group->lock);
ret = member->info.callback(status, &member->info, NULL);
deref_member(member);
if (ret)
ib_sa_unregister_inform_info(&member->info);
} else
spin_unlock_irq(&group->lock);
}
static void reg_handler(int status, struct ib_sa_inform *inform, void *context)
{
struct inform_group *group = context;
if (status)
process_join_error(group, status);
else
group->join_state = INFORM_MEMBER;
inform_work_handler(&group->work);
}
static void unreg_handler(int status, struct ib_sa_inform *rec, void *context)
{
struct inform_group *group = context;
inform_work_handler(&group->work);
}
int notice_dispatch(struct ib_device *device, u8 port_num,
struct ib_sa_notice *notice)
{
struct inform_device *dev;
struct inform_port *port;
struct inform_group *group;
struct inform_notice *info_notice;
dev = ib_get_client_data(device, &inform_client);
if (!dev)
return 0; /* No one to give notice to. */
port = &dev->port[port_num - dev->start_port];
spin_lock_irq(&port->lock);
group = inform_find(port, __be16_to_cpu(notice->trap.
generic.trap_num));
if (!group) {
spin_unlock_irq(&port->lock);
return 0;
}
atomic_inc(&group->refcount);
spin_unlock_irq(&port->lock);
info_notice = kmalloc(sizeof *info_notice, GFP_KERNEL);
if (!info_notice) {
release_group(group);
return -ENOMEM;
}
info_notice->notice = *notice;
spin_lock_irq(&group->lock);
list_add(&info_notice->list, &group->notice_list);
if (group->state == INFORM_IDLE) {
group->state = INFORM_BUSY;
spin_unlock_irq(&group->lock);
inform_work_handler(&group->work);
} else {
spin_unlock_irq(&group->lock);
release_group(group);
}
return 0;
}
static struct inform_group *acquire_group(struct inform_port *port,
u16 trap_number, gfp_t gfp_mask)
{
struct inform_group *group, *cur_group;
unsigned long flags;
spin_lock_irqsave(&port->lock, flags);
group = inform_find(port, trap_number);
if (group)
goto found;
spin_unlock_irqrestore(&port->lock, flags);
group = kzalloc(sizeof *group, gfp_mask);
if (!group)
return NULL;
group->port = port;
group->trap_number = trap_number;
INIT_LIST_HEAD(&group->pending_list);
INIT_LIST_HEAD(&group->active_list);
INIT_LIST_HEAD(&group->notice_list);
INIT_WORK(&group->work, inform_work_handler);
spin_lock_init(&group->lock);
spin_lock_irqsave(&port->lock, flags);
cur_group = inform_insert(port, group);
if (cur_group) {
kfree(group);
group = cur_group;
} else
atomic_inc(&port->refcount);
found:
atomic_inc(&group->refcount);
spin_unlock_irqrestore(&port->lock, flags);
return group;
}
/*
* We serialize all join requests to a single group to make our lives much
* easier. Otherwise, two users could try to join the same group
* simultaneously, with different configurations, one could leave while the
* join is in progress, etc., which makes locking around error recovery
* difficult.
*/
struct ib_inform_info *
ib_sa_register_inform_info(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
u16 trap_number, gfp_t gfp_mask,
int (*callback)(int status,
struct ib_inform_info *info,
struct ib_sa_notice *notice),
void *context)
{
struct inform_device *dev;
struct inform_member *member;
struct ib_inform_info *info;
int ret;
dev = ib_get_client_data(device, &inform_client);
if (!dev)
return ERR_PTR(-ENODEV);
member = kzalloc(sizeof *member, gfp_mask);
if (!member)
return ERR_PTR(-ENOMEM);
ib_sa_client_get(client);
member->client = client;
member->info.trap_number = trap_number;
member->info.callback = callback;
member->info.context = context;
init_completion(&member->comp);
atomic_set(&member->refcount, 1);
member->state = INFORM_REGISTERING;
member->group = acquire_group(&dev->port[port_num - dev->start_port],
trap_number, gfp_mask);
if (!member->group) {
ret = -ENOMEM;
goto err;
}
/*
* The user will get the info structure in their callback. They
* could then free the info structure before we can return from
* this routine. So we save the pointer to return before queuing
* any callback.
*/
info = &member->info;
queue_reg(member);
return info;
err:
ib_sa_client_put(member->client);
kfree(member);
return ERR_PTR(ret);
}
EXPORT_SYMBOL(ib_sa_register_inform_info);
void ib_sa_unregister_inform_info(struct ib_inform_info *info)
{
struct inform_member *member;
struct inform_group *group;
member = container_of(info, struct inform_member, info);
group = member->group;
spin_lock_irq(&group->lock);
if (member->state == INFORM_MEMBER)
group->members--;
list_del_init(&member->list);
if (group->state == INFORM_IDLE) {
group->state = INFORM_BUSY;
spin_unlock_irq(&group->lock);
/* Continue to hold reference on group until callback */
queue_work(inform_wq, &group->work);
} else {
spin_unlock_irq(&group->lock);
release_group(group);
}
deref_member(member);
wait_for_completion(&member->comp);
ib_sa_client_put(member->client);
kfree(member);
}
EXPORT_SYMBOL(ib_sa_unregister_inform_info);
static void inform_groups_lost(struct inform_port *port)
{
struct inform_group *group;
struct rb_node *node;
unsigned long flags;
spin_lock_irqsave(&port->lock, flags);
for (node = rb_first(&port->table); node; node = rb_next(node)) {
group = rb_entry(node, struct inform_group, node);
spin_lock(&group->lock);
if (group->state == INFORM_IDLE) {
atomic_inc(&group->refcount);
queue_work(inform_wq, &group->work);
}
group->state = INFORM_ERROR;
spin_unlock(&group->lock);
}
spin_unlock_irqrestore(&port->lock, flags);
}
static void inform_event_handler(struct ib_event_handler *handler,
struct ib_event *event)
{
struct inform_device *dev;
dev = container_of(handler, struct inform_device, event_handler);
switch (event->event) {
case IB_EVENT_PORT_ERR:
case IB_EVENT_LID_CHANGE:
case IB_EVENT_SM_CHANGE:
case IB_EVENT_CLIENT_REREGISTER:
inform_groups_lost(&dev->port[event->element.port_num -
dev->start_port]);
break;
default:
break;
}
}
static void inform_add_one(struct ib_device *device)
{
struct inform_device *dev;
struct inform_port *port;
int i;
if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
return;
dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port,
GFP_KERNEL);
if (!dev)
return;
if (device->node_type == RDMA_NODE_IB_SWITCH)
dev->start_port = dev->end_port = 0;
else {
dev->start_port = 1;
dev->end_port = device->phys_port_cnt;
}
for (i = 0; i <= dev->end_port - dev->start_port; i++) {
port = &dev->port[i];
port->dev = dev;
port->port_num = dev->start_port + i;
spin_lock_init(&port->lock);
port->table = RB_ROOT;
init_completion(&port->comp);
atomic_set(&port->refcount, 1);
}
dev->device = device;
ib_set_client_data(device, &inform_client, dev);
INIT_IB_EVENT_HANDLER(&dev->event_handler, device, inform_event_handler);
ib_register_event_handler(&dev->event_handler);
}
static void inform_remove_one(struct ib_device *device)
{
struct inform_device *dev;
struct inform_port *port;
int i;
dev = ib_get_client_data(device, &inform_client);
if (!dev)
return;
ib_unregister_event_handler(&dev->event_handler);
flush_workqueue(inform_wq);
for (i = 0; i <= dev->end_port - dev->start_port; i++) {
port = &dev->port[i];
deref_port(port);
wait_for_completion(&port->comp);
}
kfree(dev);
}
int notice_init(void)
{
int ret;
inform_wq = create_singlethread_workqueue("ib_inform");
if (!inform_wq)
return -ENOMEM;
ib_sa_register_client(&sa_client);
ret = ib_register_client(&inform_client);
if (ret)
goto err;
return 0;
err:
ib_sa_unregister_client(&sa_client);
destroy_workqueue(inform_wq);
return ret;
}
void notice_cleanup(void)
{
ib_unregister_client(&inform_client);
ib_sa_unregister_client(&sa_client);
destroy_workqueue(inform_wq);
}

View File

@ -31,6 +31,7 @@
* SOFTWARE.
*/
#include <linux/module.h>
#include <linux/string.h>
#include <rdma/ib_pack.h>

View File

@ -0,0 +1,461 @@
/*
* Copyright (c) 2013, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <rdma/ib_peer_mem.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
static DEFINE_MUTEX(peer_memory_mutex);
static LIST_HEAD(peer_memory_list);
static int num_registered_peers;
/* This code uses the sysfs which is not supporeted by the FreeBSD.
* * Will be added in future to the sysctl */
#if 0
static struct kobject *peers_kobj;
static struct ib_peer_memory_client *get_peer_by_kobj(void *kobj);
static ssize_t version_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
struct ib_peer_memory_client *ib_peer_client = get_peer_by_kobj(kobj);
if (ib_peer_client) {
sprintf(buf, "%s\n", ib_peer_client->peer_mem->version);
return strlen(buf);
}
/* not found - nothing is return */
return 0;
}
static ssize_t num_alloc_mrs_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
struct ib_peer_memory_client *ib_peer_client = get_peer_by_kobj(kobj);
if (ib_peer_client) {
sprintf(buf, "%lu\n", ib_peer_client->stats.num_alloc_mrs);
return strlen(buf);
}
/* not found - nothing is return */
return 0;
}
static ssize_t num_reg_pages_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
struct ib_peer_memory_client *ib_peer_client = get_peer_by_kobj(kobj);
if (ib_peer_client) {
sprintf(buf, "%lu\n", ib_peer_client->stats.num_reg_pages);
return strlen(buf);
}
/* not found - nothing is return */
return 0;
}
static ssize_t num_dereg_pages_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
struct ib_peer_memory_client *ib_peer_client = get_peer_by_kobj(kobj);
if (ib_peer_client) {
sprintf(buf, "%lu\n", ib_peer_client->stats.num_dereg_pages);
return strlen(buf);
}
/* not found - nothing is return */
return 0;
}
static ssize_t num_free_callbacks_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
struct ib_peer_memory_client *ib_peer_client = get_peer_by_kobj(kobj);
if (ib_peer_client) {
sprintf(buf, "%lu\n", ib_peer_client->stats.num_free_callbacks);
return strlen(buf);
}
/* not found - nothing is return */
return 0;
}
static struct kobj_attribute version_attr = __ATTR_RO(version);
static struct kobj_attribute num_alloc_mrs = __ATTR_RO(num_alloc_mrs);
static struct kobj_attribute num_reg_pages = __ATTR_RO(num_reg_pages);
static struct kobj_attribute num_dereg_pages = __ATTR_RO(num_dereg_pages);
static struct kobj_attribute num_free_callbacks = __ATTR_RO(num_free_callbacks);
static struct attribute *peer_mem_attrs[] = {
&version_attr.attr,
&num_alloc_mrs.attr,
&num_reg_pages.attr,
&num_dereg_pages.attr,
&num_free_callbacks.attr,
NULL,
};
#endif
#if 0
static void destroy_peer_sysfs(struct ib_peer_memory_client *ib_peer_client)
{
kobject_put(ib_peer_client->kobj);
if (!num_registered_peers)
kobject_put(peers_kobj);
return;
}
/* This code uses the sysfs which is not supporeted by the FreeBSD.
* Will be added in future to the sysctl */
static int create_peer_sysfs(struct ib_peer_memory_client *ib_peer_client)
{
int ret;
if (!num_registered_peers) {
/* creating under /sys/kernel/mm */
peers_kobj = kobject_create_and_add("memory_peers", mm_kobj);
if (!peers_kobj)
return -ENOMEM;
}
ib_peer_client->peer_mem_attr_group.attrs = peer_mem_attrs;
/* Dir alreday was created explicitly to get its kernel object for further usage */
ib_peer_client->peer_mem_attr_group.name = NULL;
ib_peer_client->kobj = kobject_create_and_add(ib_peer_client->peer_mem->name,
peers_kobj);
if (!ib_peer_client->kobj) {
ret = -EINVAL;
goto free;
}
/* Create the files associated with this kobject */
ret = sysfs_create_group(ib_peer_client->kobj,
&ib_peer_client->peer_mem_attr_group);
if (ret)
goto peer_free;
return 0;
peer_free:
kobject_put(ib_peer_client->kobj);
free:
if (!num_registered_peers)
kobject_put(peers_kobj);
return ret;
}
#endif
static int ib_invalidate_peer_memory(void *reg_handle,
void *core_context)
{
struct ib_peer_memory_client *ib_peer_client =
(struct ib_peer_memory_client *)reg_handle;
struct invalidation_ctx *invalidation_ctx;
struct core_ticket *core_ticket;
int need_unlock = 1;
mutex_lock(&ib_peer_client->lock);
ib_peer_client->stats.num_free_callbacks += 1;
core_ticket = ib_peer_search_context(ib_peer_client,
(unsigned long)core_context);
if (!core_ticket)
goto out;
invalidation_ctx = (struct invalidation_ctx *)core_ticket->context;
/* If context not ready yet mark to be invalidated */
if (!invalidation_ctx->func) {
invalidation_ctx->peer_invalidated = 1;
goto out;
}
invalidation_ctx->func(invalidation_ctx->cookie,
invalidation_ctx->umem, 0, 0);
if (invalidation_ctx->inflight_invalidation) {
/* init the completion to wait on before letting other thread to run */
init_completion(&invalidation_ctx->comp);
mutex_unlock(&ib_peer_client->lock);
need_unlock = 0;
wait_for_completion(&invalidation_ctx->comp);
}
kfree(invalidation_ctx);
out:
if (need_unlock)
mutex_unlock(&ib_peer_client->lock);
return 0;
}
/* access to that peer client is under its lock - no extra lock is needed */
unsigned long ib_peer_insert_context(struct ib_peer_memory_client *ib_peer_client,
void *context)
{
struct core_ticket *core_ticket = kzalloc(sizeof(*core_ticket), GFP_KERNEL);
ib_peer_client->last_ticket++;
core_ticket->context = context;
core_ticket->key = ib_peer_client->last_ticket;
list_add_tail(&core_ticket->ticket_list,
&ib_peer_client->core_ticket_list);
return core_ticket->key;
}
int ib_peer_remove_context(struct ib_peer_memory_client *ib_peer_client,
unsigned long key)
{
struct core_ticket *core_ticket, *tmp;
list_for_each_entry_safe(core_ticket, tmp, &ib_peer_client->core_ticket_list,
ticket_list) {
if (core_ticket->key == key) {
list_del(&core_ticket->ticket_list);
kfree(core_ticket);
return 0;
}
}
return 1;
}
struct core_ticket *ib_peer_search_context(struct ib_peer_memory_client *ib_peer_client,
unsigned long key)
{
struct core_ticket *core_ticket, *tmp;
list_for_each_entry_safe(core_ticket, tmp, &ib_peer_client->core_ticket_list,
ticket_list) {
if (core_ticket->key == key)
return core_ticket;
}
return NULL;
}
static int ib_memory_peer_check_mandatory(struct peer_memory_client
*peer_client)
{
#define PEER_MEM_MANDATORY_FUNC(x) {\
offsetof(struct peer_memory_client, x), #x }
static const struct {
size_t offset;
char *name;
} mandatory_table[] = {
PEER_MEM_MANDATORY_FUNC(acquire),
PEER_MEM_MANDATORY_FUNC(get_pages),
PEER_MEM_MANDATORY_FUNC(put_pages),
PEER_MEM_MANDATORY_FUNC(get_page_size),
PEER_MEM_MANDATORY_FUNC(dma_map),
PEER_MEM_MANDATORY_FUNC(dma_unmap)
};
int i;
for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
if (!*(void **) ((void *) peer_client + mandatory_table[i].offset)) {
printk(KERN_WARNING "Peer memory %s is missing mandatory function %s\n",
peer_client->name, mandatory_table[i].name);
return -EINVAL;
}
}
return 0;
}
void *ib_register_peer_memory_client(struct peer_memory_client *peer_client,
invalidate_peer_memory *invalidate_callback)
{
int ret = 0;
struct ib_peer_memory_client *ib_peer_client = NULL;
mutex_lock(&peer_memory_mutex);
if (ib_memory_peer_check_mandatory(peer_client)) {
ret = -EINVAL;
goto out;
}
ib_peer_client = kzalloc(sizeof(*ib_peer_client), GFP_KERNEL);
if (!ib_peer_client)
goto out;
ib_peer_client->peer_mem = peer_client;
INIT_LIST_HEAD(&ib_peer_client->core_ticket_list);
mutex_init(&ib_peer_client->lock);
#ifdef __FreeBSD__
ib_peer_client->holdcount = 0;
ib_peer_client->needwakeup = 0;
cv_init(&ib_peer_client->peer_cv, "ibprcl");
#else
ret = init_srcu_struct(&ib_peer_client->peer_srcu);
if (ret)
goto free;
#endif
#if 0
if (create_peer_sysfs(ib_peer_client))
goto free;
#endif
*invalidate_callback = ib_invalidate_peer_memory;
list_add_tail(&ib_peer_client->core_peer_list, &peer_memory_list);
num_registered_peers++;
goto out;
#if 0
free:
kfree(ib_peer_client);
ib_peer_client = NULL;
#endif
out:
mutex_unlock(&peer_memory_mutex);
return ib_peer_client;
}
EXPORT_SYMBOL(ib_register_peer_memory_client);
void ib_unregister_peer_memory_client(void *reg_handle)
{
struct ib_peer_memory_client *ib_peer_client =
(struct ib_peer_memory_client *)reg_handle;
mutex_lock(&peer_memory_mutex);
/* remove from list to prevent future core clients usage as it goes down */
list_del(&ib_peer_client->core_peer_list);
#ifdef __FreeBSD__
while (ib_peer_client->holdcount != 0) {
ib_peer_client->needwakeup = 1;
cv_wait(&ib_peer_client->peer_cv, &peer_memory_mutex.sx);
}
cv_destroy(&ib_peer_client->peer_cv);
#else
mutex_unlock(&peer_memory_mutex);
/* peer memory can't go down while there are active clients */
synchronize_srcu(&ib_peer_client->peer_srcu);
cleanup_srcu_struct(&ib_peer_client->peer_srcu);
mutex_lock(&peer_memory_mutex);
#endif
num_registered_peers--;
/* This code uses the sysfs which is not supporeted by the FreeBSD.
* Will be added in future to the sysctl */
#if 0
destroy_peer_sysfs(ib_peer_client);
#endif
mutex_unlock(&peer_memory_mutex);
kfree(ib_peer_client);
}
EXPORT_SYMBOL(ib_unregister_peer_memory_client);
/* This code uses the sysfs which is not supporeted by the FreeBSD.
* Will be added in future to the sysctl */
#if 0
static struct ib_peer_memory_client *get_peer_by_kobj(void *kobj)
{
struct ib_peer_memory_client *ib_peer_client;
mutex_lock(&peer_memory_mutex);
list_for_each_entry(ib_peer_client, &peer_memory_list, core_peer_list) {
if (ib_peer_client->kobj == kobj)
goto found;
}
ib_peer_client = NULL;
found:
mutex_unlock(&peer_memory_mutex);
return ib_peer_client;
}
#endif
struct ib_peer_memory_client *ib_get_peer_client(struct ib_ucontext *context, unsigned long addr,
size_t size, void **peer_client_context,
int *srcu_key)
{
struct ib_peer_memory_client *ib_peer_client;
int ret;
mutex_lock(&peer_memory_mutex);
list_for_each_entry(ib_peer_client, &peer_memory_list, core_peer_list) {
ret = ib_peer_client->peer_mem->acquire(addr, size,
context->peer_mem_private_data,
context->peer_mem_name,
peer_client_context);
if (ret == 1)
goto found;
}
ib_peer_client = NULL;
found:
if (ib_peer_client) {
#ifdef __FreeBSD__
ib_peer_client->holdcount++;
#else
*srcu_key = srcu_read_lock(&ib_peer_client->peer_srcu);
#endif
}
mutex_unlock(&peer_memory_mutex);
return ib_peer_client;
}
EXPORT_SYMBOL(ib_get_peer_client);
void ib_put_peer_client(struct ib_peer_memory_client *ib_peer_client,
void *peer_client_context,
int srcu_key)
{
if (ib_peer_client->peer_mem->release)
ib_peer_client->peer_mem->release(peer_client_context);
#ifdef __FreeBSD__
ib_peer_client->holdcount--;
if (ib_peer_client->holdcount == 0 && ib_peer_client->needwakeup) {
cv_signal(&ib_peer_client->peer_cv);
}
#else
srcu_read_unlock(&ib_peer_client->peer_srcu, srcu_key);
#endif
return;
}
EXPORT_SYMBOL(ib_put_peer_client);

View File

@ -48,29 +48,6 @@ static inline void ib_sa_client_put(struct ib_sa_client *client)
complete(&client->comp);
}
int ib_sa_check_selector(ib_sa_comp_mask comp_mask,
ib_sa_comp_mask selector_mask,
ib_sa_comp_mask value_mask,
u8 selector, u8 src_value, u8 dst_value);
int ib_sa_pack_attr(void *dst, void *src, int attr_id);
int ib_sa_unpack_attr(void *dst, void *src, int attr_id);
int ib_sa_path_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
struct ib_sa_path_rec *rec,
ib_sa_comp_mask comp_mask,
int timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_path_rec *resp,
void *context),
void *context,
struct ib_sa_query **sa_query);
int sa_db_init(void);
void sa_db_cleanup(void);
int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
u8 method,
@ -86,20 +63,4 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
int mcast_init(void);
void mcast_cleanup(void);
int ib_sa_informinfo_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
struct ib_sa_inform *rec,
int timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_inform *resp,
void *context),
void *context,
struct ib_sa_query **sa_query);
int notice_dispatch(struct ib_device *device, u8 port_num,
struct ib_sa_notice *notice);
int notice_init(void);
void notice_cleanup(void);
#endif /* SA_H */

View File

@ -59,12 +59,10 @@ struct ib_sa_sm_ah {
struct ib_sa_port {
struct ib_mad_agent *agent;
struct ib_mad_agent *notice_agent;
struct ib_sa_sm_ah *sm_ah;
struct work_struct update_task;
spinlock_t ah_lock;
u8 port_num;
struct ib_device *device;
};
struct ib_sa_device {
@ -95,14 +93,14 @@ struct ib_sa_path_query {
struct ib_sa_query sa_query;
};
struct ib_sa_mcmember_query {
void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
struct ib_sa_guidinfo_query {
void (*callback)(int, struct ib_sa_guidinfo_rec *, void *);
void *context;
struct ib_sa_query sa_query;
};
struct ib_sa_inform_query {
void (*callback)(int, struct ib_sa_inform *, void *);
struct ib_sa_mcmember_query {
void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
void *context;
struct ib_sa_query sa_query;
};
@ -116,10 +114,10 @@ static struct ib_client sa_client = {
.remove = ib_sa_remove_one
};
static spinlock_t idr_lock;
static DEFINE_SPINLOCK(idr_lock);
static DEFINE_IDR(query_idr);
static spinlock_t tid_lock;
static DEFINE_SPINLOCK(tid_lock);
static u32 tid;
#define PATH_REC_FIELD(field) \
@ -354,162 +352,34 @@ static const struct ib_field service_rec_table[] = {
.size_bits = 2*64 },
};
#define INFORM_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_inform, field), \
.struct_size_bytes = sizeof ((struct ib_sa_inform *) 0)->field, \
.field_name = "sa_inform:" #field
#define GUIDINFO_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \
.struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \
.field_name = "sa_guidinfo_rec:" #field
static const struct ib_field inform_table[] = {
{ INFORM_FIELD(gid),
static const struct ib_field guidinfo_rec_table[] = {
{ GUIDINFO_REC_FIELD(lid),
.offset_words = 0,
.offset_bits = 0,
.size_bits = 128 },
{ INFORM_FIELD(lid_range_begin),
.offset_words = 4,
.offset_bits = 0,
.size_bits = 16 },
{ INFORM_FIELD(lid_range_end),
.offset_words = 4,
.offset_bits = 16,
.size_bits = 16 },
{ RESERVED,
.offset_words = 5,
.offset_bits = 0,
.size_bits = 16 },
{ INFORM_FIELD(is_generic),
.offset_words = 5,
{ GUIDINFO_REC_FIELD(block_num),
.offset_words = 0,
.offset_bits = 16,
.size_bits = 8 },
{ INFORM_FIELD(subscribe),
.offset_words = 5,
{ GUIDINFO_REC_FIELD(res1),
.offset_words = 0,
.offset_bits = 24,
.size_bits = 8 },
{ INFORM_FIELD(type),
.offset_words = 6,
.offset_bits = 0,
.size_bits = 16 },
{ INFORM_FIELD(trap.generic.trap_num),
.offset_words = 6,
.offset_bits = 16,
.size_bits = 16 },
{ INFORM_FIELD(trap.generic.qpn),
.offset_words = 7,
.offset_bits = 0,
.size_bits = 24 },
{ RESERVED,
.offset_words = 7,
.offset_bits = 24,
.size_bits = 3 },
{ INFORM_FIELD(trap.generic.resp_time),
.offset_words = 7,
.offset_bits = 27,
.size_bits = 5 },
{ RESERVED,
.offset_words = 8,
.offset_bits = 0,
.size_bits = 8 },
{ INFORM_FIELD(trap.generic.producer_type),
.offset_words = 8,
.offset_bits = 8,
.size_bits = 24 },
};
#define NOTICE_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_notice, field), \
.struct_size_bytes = sizeof ((struct ib_sa_notice *) 0)->field, \
.field_name = "sa_notice:" #field
static const struct ib_field notice_table[] = {
{ NOTICE_FIELD(is_generic),
.offset_words = 0,
.offset_bits = 0,
.size_bits = 1 },
{ NOTICE_FIELD(type),
.offset_words = 0,
.offset_bits = 1,
.size_bits = 7 },
{ NOTICE_FIELD(trap.generic.producer_type),
.offset_words = 0,
.offset_bits = 8,
.size_bits = 24 },
{ NOTICE_FIELD(trap.generic.trap_num),
{ GUIDINFO_REC_FIELD(res2),
.offset_words = 1,
.offset_bits = 0,
.size_bits = 16 },
{ NOTICE_FIELD(issuer_lid),
.offset_words = 1,
.offset_bits = 16,
.size_bits = 16 },
{ NOTICE_FIELD(notice_toggle),
.size_bits = 32 },
{ GUIDINFO_REC_FIELD(guid_info_list),
.offset_words = 2,
.offset_bits = 0,
.size_bits = 1 },
{ NOTICE_FIELD(notice_count),
.offset_words = 2,
.offset_bits = 1,
.size_bits = 15 },
{ NOTICE_FIELD(data_details),
.offset_words = 2,
.offset_bits = 16,
.size_bits = 432 },
{ NOTICE_FIELD(issuer_gid),
.offset_words = 16,
.offset_bits = 0,
.size_bits = 128 },
.size_bits = 512 },
};
int ib_sa_check_selector(ib_sa_comp_mask comp_mask,
ib_sa_comp_mask selector_mask,
ib_sa_comp_mask value_mask,
u8 selector, u8 src_value, u8 dst_value)
{
int err;
if (!(comp_mask & selector_mask) || !(comp_mask & value_mask))
return 0;
switch (selector) {
case IB_SA_GT:
err = (src_value <= dst_value);
break;
case IB_SA_LT:
err = (src_value >= dst_value);
break;
case IB_SA_EQ:
err = (src_value != dst_value);
break;
default:
err = 0;
break;
}
return err;
}
int ib_sa_pack_attr(void *dst, void *src, int attr_id)
{
switch (attr_id) {
case IB_SA_ATTR_PATH_REC:
ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), src, dst);
break;
default:
return -EINVAL;
}
return 0;
}
int ib_sa_unpack_attr(void *dst, void *src, int attr_id)
{
switch (attr_id) {
case IB_SA_ATTR_PATH_REC:
ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), src, dst);
break;
default:
return -EINVAL;
}
return 0;
}
static void free_sm_ah(struct kref *kref)
{
struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
@ -588,7 +458,7 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event
port->sm_ah = NULL;
spin_unlock_irqrestore(&port->ah_lock, flags);
schedule_work(&sa_dev->port[event->element.port_num -
queue_work(ib_wq, &sa_dev->port[event->element.port_num -
sa_dev->start_port].update_task);
}
}
@ -685,6 +555,14 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
ah_attr->grh.hop_limit = rec->hop_limit;
ah_attr->grh.traffic_class = rec->traffic_class;
}
if (force_grh) {
memcpy(ah_attr->dmac, rec->dmac, 6);
ah_attr->vlan_id = rec->vlan_id;
} else {
memset(ah_attr->dmac, 0, 6);
ah_attr->vlan_id = 0xffff;
}
return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_path);
@ -791,6 +669,10 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
mad->data, &rec);
rec.vlan_id = 0xffff;
memset(rec.dmac, 0, ETH_ALEN);
memset(rec.smac, 0, ETH_ALEN);
query->callback(status, &rec, query->context);
} else
query->callback(status, NULL, query->context);
@ -801,7 +683,33 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
}
int ib_sa_path_rec_query(struct ib_sa_client *client,
/**
* ib_sa_path_rec_get - Start a Path get query
* @client:SA client
* @device:device to send query on
* @port_num: port number to send query on
* @rec:Path Record to send in query
* @comp_mask:component mask to send in query
* @timeout_ms:time to wait for response
* @gfp_mask:GFP mask to use for internal allocations
* @callback:function called when query completes, times out or is
* canceled
* @context:opaque user context passed to callback
* @sa_query:query context, used to cancel query
*
* Send a Path Record Get query to the SA to look up a path. The
* callback function will be called when the query completes (or
* fails); status is 0 for a successful response, -EINTR if the query
* is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
* occurred sending the query. The resp parameter of the callback is
* only valid if status is 0.
*
* If the return value of ib_sa_path_rec_get() is negative, it is an
* error code. Otherwise it is a query ID that can be used to cancel
* the query.
*/
int ib_sa_path_rec_get(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
struct ib_sa_path_rec *rec,
ib_sa_comp_mask comp_mask,
@ -867,6 +775,7 @@ err1:
kfree(query);
return ret;
}
EXPORT_SYMBOL(ib_sa_path_rec_get);
static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query,
int status,
@ -1082,26 +991,27 @@ err1:
return ret;
}
static void ib_sa_inform_callback(struct ib_sa_query *sa_query,
/* Support GuidInfoRecord */
static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)
{
struct ib_sa_inform_query *query =
container_of(sa_query, struct ib_sa_inform_query, sa_query);
struct ib_sa_guidinfo_query *query =
container_of(sa_query, struct ib_sa_guidinfo_query, sa_query);
if (mad) {
struct ib_sa_inform rec;
struct ib_sa_guidinfo_rec rec;
ib_unpack(inform_table, ARRAY_SIZE(inform_table),
ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table),
mad->data, &rec);
query->callback(status, &rec, query->context);
} else
query->callback(status, NULL, query->context);
}
static void ib_sa_inform_release(struct ib_sa_query *sa_query)
static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query)
{
kfree(container_of(sa_query, struct ib_sa_inform_query, sa_query));
kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query));
}
int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
@ -1115,52 +1025,7 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
void *context,
struct ib_sa_query **sa_query)
{
// stub function -
// called originally from mad.c under mlx4_ib_init_sriov()
// which calls mlx4_ib_init_alias_guid_service() in alias_GUID.c
// which goes down to this function
printk("ERROR: function should be called only in SRIOV flow!!!");
return 0;
}
/**
* ib_sa_informinfo_query - Start an InformInfo registration.
* @client:SA client
* @device:device to send query on
* @port_num: port number to send query on
* @rec:Inform record to send in query
* @timeout_ms:time to wait for response
* @gfp_mask:GFP mask to use for internal allocations
* @callback:function called when notice handler registration completes,
* times out or is canceled
* @context:opaque user context passed to callback
* @sa_query:query context, used to cancel query
*
* This function sends inform info to register with SA to receive
* in-service notice.
* The callback function will be called when the query completes (or
* fails); status is 0 for a successful response, -EINTR if the query
* is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
* occurred sending the query. The resp parameter of the callback is
* only valid if status is 0.
*
* If the return value of ib_sa_inform_query() is negative, it is an
* error code. Otherwise it is a query ID that can be used to cancel
* the query.
*/
int ib_sa_informinfo_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
struct ib_sa_inform *rec,
int timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_inform *resp,
void *context),
void *context,
struct ib_sa_query **sa_query)
{
struct ib_sa_inform_query *query;
struct ib_sa_guidinfo_query *query;
struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
struct ib_sa_port *port;
struct ib_mad_agent *agent;
@ -1170,6 +1035,12 @@ int ib_sa_informinfo_query(struct ib_sa_client *client,
if (!sa_dev)
return -ENODEV;
if (method != IB_MGMT_METHOD_GET &&
method != IB_MGMT_METHOD_SET &&
method != IB_SA_METHOD_DELETE) {
return -EINVAL;
}
port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
@ -1190,15 +1061,18 @@ int ib_sa_informinfo_query(struct ib_sa_client *client,
mad = query->sa_query.mad_buf->mad;
init_mad(mad, agent);
query->sa_query.callback = callback ? ib_sa_inform_callback : NULL;
query->sa_query.release = ib_sa_inform_release;
query->sa_query.port = port;
mad->mad_hdr.method = IB_MGMT_METHOD_SET;
mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_INFORM_INFO);
query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL;
query->sa_query.release = ib_sa_guidinfo_rec_release;
ib_pack(inform_table, ARRAY_SIZE(inform_table), rec, mad->data);
mad->mad_hdr.method = method;
mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC);
mad->sa_hdr.comp_mask = comp_mask;
ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec,
mad->data);
*sa_query = &query->sa_query;
ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
if (ret < 0)
goto err2;
@ -1209,49 +1083,12 @@ err2:
*sa_query = NULL;
ib_sa_client_put(query->sa_query.client);
free_mad(&query->sa_query);
err1:
kfree(query);
return ret;
}
static void ib_sa_notice_resp(struct ib_sa_port *port,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_mad_send_buf *mad_buf;
struct ib_sa_mad *mad;
int ret;
unsigned long flags;
mad_buf = ib_create_send_mad(port->notice_agent, 1, 0, 0,
IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
GFP_KERNEL);
if (IS_ERR(mad_buf))
return;
mad = mad_buf->mad;
memcpy(mad, mad_recv_wc->recv_buf.mad, sizeof *mad);
mad->mad_hdr.method = IB_MGMT_METHOD_REPORT_RESP;
spin_lock_irqsave(&port->ah_lock, flags);
if (!port->sm_ah) {
spin_unlock_irqrestore(&port->ah_lock, flags);
ib_free_send_mad(mad_buf);
return;
}
kref_get(&port->sm_ah->ref);
mad_buf->context[0] = &port->sm_ah->ref;
mad_buf->ah = port->sm_ah->ah;
spin_unlock_irqrestore(&port->ah_lock, flags);
ret = ib_post_send_mad(mad_buf, NULL);
if (ret)
goto err;
return;
err:
kref_put(mad_buf->context[0], free_sm_ah);
ib_free_send_mad(mad_buf);
}
EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
@ -1306,36 +1143,9 @@ static void recv_handler(struct ib_mad_agent *mad_agent,
ib_free_recv_mad(mad_recv_wc);
}
static void notice_resp_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
kref_put(mad_send_wc->send_buf->context[0], free_sm_ah);
ib_free_send_mad(mad_send_wc->send_buf);
}
static void notice_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_sa_port *port;
struct ib_sa_mad *mad;
struct ib_sa_notice notice;
port = mad_agent->context;
mad = (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad;
ib_unpack(notice_table, ARRAY_SIZE(notice_table), mad->data, &notice);
if (!notice_dispatch(port->device, port->port_num, &notice))
ib_sa_notice_resp(port, mad_recv_wc);
ib_free_recv_mad(mad_recv_wc);
}
static void ib_sa_add_one(struct ib_device *device)
{
struct ib_sa_device *sa_dev;
struct ib_mad_reg_req reg_req = {
.mgmt_class = IB_MGMT_CLASS_SUBN_ADM,
.mgmt_class_version = 2
};
int s, e, i;
if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
@ -1372,16 +1182,6 @@ static void ib_sa_add_one(struct ib_device *device)
if (IS_ERR(sa_dev->port[i].agent))
goto err;
sa_dev->port[i].device = device;
set_bit(IB_MGMT_METHOD_REPORT, reg_req.method_mask);
sa_dev->port[i].notice_agent =
ib_register_mad_agent(device, i + s, IB_QPT_GSI,
&reg_req, 0, notice_resp_handler,
notice_handler, &sa_dev->port[i]);
if (IS_ERR(sa_dev->port[i].notice_agent))
goto err;
INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
}
@ -1396,7 +1196,7 @@ static void ib_sa_add_one(struct ib_device *device)
INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
if (ib_register_event_handler(&sa_dev->event_handler))
goto err;
goto reg_err;
for (i = 0; i <= e - s; ++i)
if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND)
@ -1404,14 +1204,14 @@ static void ib_sa_add_one(struct ib_device *device)
return;
reg_err:
ib_set_client_data(device, &sa_client, NULL);
i = e - s;
err:
while (--i >= 0)
if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) {
if (!IS_ERR(sa_dev->port[i].notice_agent))
ib_unregister_mad_agent(sa_dev->port[i].notice_agent);
if (!IS_ERR(sa_dev->port[i].agent))
for (; i >= 0; --i)
if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND &&
!IS_ERR(sa_dev->port[i].agent))
ib_unregister_mad_agent(sa_dev->port[i].agent);
}
kfree(sa_dev);
@ -1428,11 +1228,10 @@ static void ib_sa_remove_one(struct ib_device *device)
ib_unregister_event_handler(&sa_dev->event_handler);
flush_scheduled_work();
flush_workqueue(ib_wq);
for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) {
ib_unregister_mad_agent(sa_dev->port[i].notice_agent);
ib_unregister_mad_agent(sa_dev->port[i].agent);
if (sa_dev->port[i].sm_ah)
kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
@ -1447,9 +1246,6 @@ static int __init ib_sa_init(void)
{
int ret;
spin_lock_init(&idr_lock);
spin_lock_init(&tid_lock);
get_random_bytes(&tid, sizeof tid);
ret = ib_register_client(&sa_client);
@ -1464,23 +1260,7 @@ static int __init ib_sa_init(void)
goto err2;
}
ret = notice_init();
if (ret) {
printk(KERN_ERR "Couldn't initialize notice handling\n");
goto err3;
}
ret = sa_db_init();
if (ret) {
printk(KERN_ERR "Couldn't initialize local SA\n");
goto err4;
}
return 0;
err4:
notice_cleanup();
err3:
mcast_cleanup();
err2:
ib_unregister_client(&sa_client);
err1:
@ -1489,9 +1269,7 @@ err1:
static void __exit ib_sa_cleanup(void)
{
sa_db_cleanup();
mcast_cleanup();
notice_cleanup();
ib_unregister_client(&sa_client);
idr_destroy(&query_idr);
}

View File

@ -52,6 +52,10 @@ enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
hop_cnt = smp->hop_cnt;
/* See section 14.2.2.2, Vol 1 IB spec */
/* C14-6 -- valid hop_cnt values are from 0 to 63 */
if (hop_cnt >= IB_SMP_MAX_PATH_HOPS)
return IB_SMI_DISCARD;
if (!ib_get_smp_direction(smp)) {
/* C14-9:1 */
if (hop_cnt && hop_ptr == 0) {
@ -133,6 +137,10 @@ enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
hop_cnt = smp->hop_cnt;
/* See section 14.2.2.2, Vol 1 IB spec */
/* C14-6 -- valid hop_cnt values are from 0 to 63 */
if (hop_cnt >= IB_SMP_MAX_PATH_HOPS)
return IB_SMI_DISCARD;
if (!ib_get_smp_direction(smp)) {
/* C14-9:1 -- sender should have incremented hop_ptr */
if (hop_cnt && hop_ptr == 0)

View File

@ -37,6 +37,7 @@
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/fs.h>
#include <linux/printk.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_pma.h>
@ -105,7 +106,7 @@ static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
return ret;
return sprintf(buf, "%d: %s\n", attr.state,
attr.state < ARRAY_SIZE(state_name) ?
attr.state >= 0 && attr.state < ARRAY_SIZE(state_name) ?
state_name[attr.state] : "UNKNOWN");
}
@ -180,19 +181,18 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
{
struct ib_port_attr attr;
char *speed = "";
int rate;
int rate; /* in deci-Gb/sec */
ssize_t ret;
ret = ib_query_port(p->ibdev, p->port_num, &attr);
if (ret)
return ret;
switch (attr.active_speed) {
case 2: speed = " DDR"; break;
case 4: speed = " QDR"; break;
}
ib_active_speed_enum_to_rate(attr.active_speed,
&rate,
&speed);
rate = 25 * ib_width_enum_to_int(attr.active_width) * attr.active_speed;
rate *= ib_width_enum_to_int(attr.active_width);
if (rate < 0)
return -EINVAL;
@ -229,9 +229,11 @@ static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused,
{
switch (rdma_port_get_link_layer(p->ibdev, p->port_num)) {
case IB_LINK_LAYER_INFINIBAND:
return sprintf(buf, "%s\n", "IB");
return sprintf(buf, "%s\n", "InfiniBand");
case IB_LINK_LAYER_ETHERNET:
return sprintf(buf, "%s\n", "Ethernet");
case IB_LINK_LAYER_SCIF:
return sprintf(buf, "%s\n", "SCIF");
default:
return sprintf(buf, "%s\n", "Unknown");
}
@ -267,16 +269,12 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
container_of(attr, struct port_table_attribute, attr);
union ib_gid gid;
ssize_t ret;
u16 *raw;
ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid);
if (ret)
return ret;
raw = (u16 *)gid.raw;
return sprintf(buf, "%.4x:%.4x:%.4x:%.4x:%.4x:%.4x:%.4x:%.4x\n",
htons(raw[0]), htons(raw[1]), htons(raw[2]), htons(raw[3]),
htons(raw[4]), htons(raw[5]), htons(raw[6]), htons(raw[7]));
return sprintf(buf, GID_PRINT_FMT"\n",GID_PRINT_ARGS(gid.raw));
}
static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
@ -351,8 +349,8 @@ static ssize_t get_pma_counters(struct ib_port *p, struct port_attribute *attr,
be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8)));
break;
case 64:
ret = sprintf(buf, "%llu\n", (unsigned long long)
be64_to_cpup((__be64 *)(out_mad->data + 40 + offset / 8)));
ret = sprintf(buf, "%llu\n",
(unsigned long long)be64_to_cpup((__be64 *)(out_mad->data + 40 + offset / 8)));
break;
default:
ret = 0;
@ -536,6 +534,7 @@ alloc_group_attrs(ssize_t (*show)(struct ib_port *,
element->attr.attr.mode = S_IRUGO;
element->attr.show = show;
element->index = i;
sysfs_attr_init(&element->attr.attr);
tab_attr[i] = &element->attr.attr;
}
@ -570,7 +569,7 @@ static int add_port(struct ib_device *device, int port_num,
p->port_num = port_num;
ret = kobject_init_and_add(&p->kobj, &port_type,
kobject_get(device->ports_parent),
device->ports_parent,
"%d", port_num);
if (ret)
goto err_put;
@ -609,7 +608,6 @@ static int add_port(struct ib_device *device, int port_num,
}
list_add_tail(&p->kobj.entry, &device->port_list);
#ifdef __linux__
kobject_uevent(&p->kobj, KOBJ_ADD);
#endif
@ -655,6 +653,7 @@ static ssize_t show_node_type(struct device *device,
case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type);
case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
case RDMA_NODE_MIC: return sprintf(buf, "%d: MIC\n", dev->node_type);
default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
}
}
@ -716,16 +715,75 @@ static ssize_t set_node_desc(struct device *device,
return count;
}
static ssize_t show_cmd_perf(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
return sprintf(buf, "%d\n", dev->cmd_perf);
}
static ssize_t set_cmd_perf(struct device *device,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
u32 val;
if (sscanf(buf, "0x%x", &val) != 1)
return -EINVAL;
dev->cmd_perf = val;
return count;
}
static ssize_t show_cmd_avg(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
return sprintf(buf, "%llu\n", (unsigned long long)dev->cmd_avg);
}
static ssize_t set_cmd_avg(struct device *device,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
spin_lock(&dev->cmd_perf_lock);
dev->cmd_avg = 0;
dev->cmd_n = 0;
spin_unlock(&dev->cmd_perf_lock);
return count;
}
static ssize_t show_cmd_n(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
return sprintf(buf, "%d\n", dev->cmd_n);
}
static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL);
static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL);
static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL);
static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc);
static DEVICE_ATTR(cmd_perf, S_IRUGO | S_IWUSR, show_cmd_perf, set_cmd_perf);
static DEVICE_ATTR(cmd_avg, S_IRUGO | S_IWUSR, show_cmd_avg, set_cmd_avg);
static DEVICE_ATTR(cmd_n, S_IRUGO, show_cmd_n, NULL);
static struct device_attribute *ib_class_attributes[] = {
&dev_attr_node_type,
&dev_attr_sys_image_guid,
&dev_attr_node_guid,
&dev_attr_node_desc
&dev_attr_node_desc,
&dev_attr_cmd_perf,
&dev_attr_cmd_avg,
&dev_attr_cmd_n,
};
static struct class ib_class = {
@ -851,7 +909,8 @@ static struct attribute_group iw_stats_group = {
};
int ib_device_register_sysfs(struct ib_device *device,
int (*port_callback)(struct ib_device *, u8, struct kobject *))
int (*port_callback)(struct ib_device *,
u8, struct kobject *))
{
struct device *class_dev = &device->dev;
int ret;
@ -874,8 +933,7 @@ int ib_device_register_sysfs(struct ib_device *device,
goto err_unregister;
}
device->ports_parent = kobject_create_and_add("ports",
kobject_get(&class_dev->kobj));
device->ports_parent = kobject_create_and_add("ports",&class_dev->kobj);
if (!device->ports_parent) {
ret = -ENOMEM;
goto err_put;
@ -919,6 +977,11 @@ err_put:
kobject_put(&class_dev->kobj);
err_unregister:
for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) {
device_remove_file(class_dev, ib_class_attributes[i]);
}
device_unregister(class_dev);
err:
@ -927,15 +990,16 @@ err:
void ib_device_unregister_sysfs(struct ib_device *device)
{
int i;
struct kobject *p, *t;
struct ib_port *port;
int i;
struct device *class_dev = &device->dev;
/* Hold kobject until ib_dealloc_device() */
kobject_get(&device->dev.kobj);
for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) {
device_remove_file(&device->dev, ib_class_attributes[i]);
device_remove_file(class_dev, ib_class_attributes[i]);
}
list_for_each_entry_safe(p, t, &device->port_list, entry) {
@ -960,22 +1024,3 @@ void ib_sysfs_cleanup(void)
{
class_unregister(&ib_class);
}
/*int ib_sysfs_create_port_files(struct ib_device *device,
int (*create)(struct ib_device *dev, u8 port_num,
struct kobject *kobj))
{
struct kobject *p;
struct ib_port *port;
int ret = 0;
list_for_each_entry(p, &device->port_list, entry) {
port = container_of(p, struct ib_port, kobj);
ret = create(device, port->port_num, &port->kobj);
if (ret)
break;
}
return ret;
}
EXPORT_SYMBOL(ib_sysfs_create_port_files);*/

View File

@ -37,10 +37,12 @@
#include <linux/device.h>
#include <linux/err.h>
#include <linux/poll.h>
#include <linux/sched.h>
#include <linux/file.h>
#include <linux/cdev.h>
#include <linux/idr.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <asm/uaccess.h>
@ -396,7 +398,6 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
struct ib_ucm_event_get cmd;
struct ib_ucm_event *uevent;
int result = 0;
DEFINE_WAIT(wait);
if (out_len < sizeof(struct ib_ucm_event_resp))
return -ENOSPC;
@ -1123,7 +1124,7 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
if (copy_from_user(&hdr, buf, sizeof(hdr)))
return -EFAULT;
if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))
if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))
return -EINVAL;
if (hdr.in + sizeof(hdr) > len)
@ -1163,7 +1164,7 @@ static int ib_ucm_open(struct inode *inode, struct file *filp)
{
struct ib_ucm_file *file;
file = kzalloc(sizeof(*file), GFP_KERNEL);
file = kmalloc(sizeof(*file), GFP_KERNEL);
if (!file)
return -ENOMEM;
@ -1177,7 +1178,7 @@ static int ib_ucm_open(struct inode *inode, struct file *filp)
file->filp = filp;
file->device = container_of(inode->i_cdev->si_drv1, struct ib_ucm_device, cdev);
return 0;
return nonseekable_open(inode, filp);
}
static int ib_ucm_close(struct inode *inode, struct file *filp)
@ -1212,7 +1213,10 @@ static void ib_ucm_release_dev(struct device *dev)
ucm_dev = container_of(dev, struct ib_ucm_device, dev);
cdev_del(&ucm_dev->cdev);
if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
clear_bit(ucm_dev->devnum, dev_map);
else
clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, dev_map);
kfree(ucm_dev);
}
@ -1222,6 +1226,7 @@ static const struct file_operations ucm_fops = {
.release = ib_ucm_close,
.write = ib_ucm_write,
.poll = ib_ucm_poll,
.llseek = no_llseek,
};
static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
@ -1234,8 +1239,32 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
static dev_t overflow_maj;
static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES);
static int find_overflow_devnum(void)
{
int ret;
if (!overflow_maj) {
ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES,
"infiniband_cm");
if (ret) {
printk(KERN_ERR "ucm: couldn't register dynamic device number\n");
return ret;
}
}
ret = find_first_zero_bit(overflow_map, IB_UCM_MAX_DEVICES);
if (ret >= IB_UCM_MAX_DEVICES)
return -1;
return ret;
}
static void ib_ucm_add_one(struct ib_device *device)
{
int devnum;
dev_t base;
struct ib_ucm_device *ucm_dev;
if (!device->alloc_ucontext ||
@ -1248,16 +1277,25 @@ static void ib_ucm_add_one(struct ib_device *device)
ucm_dev->ib_dev = device;
ucm_dev->devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
if (ucm_dev->devnum >= IB_UCM_MAX_DEVICES)
devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
if (devnum >= IB_UCM_MAX_DEVICES) {
devnum = find_overflow_devnum();
if (devnum < 0)
goto err;
set_bit(ucm_dev->devnum, dev_map);
ucm_dev->devnum = devnum + IB_UCM_MAX_DEVICES;
base = devnum + overflow_maj;
set_bit(devnum, overflow_map);
} else {
ucm_dev->devnum = devnum;
base = devnum + IB_UCM_BASE_DEV;
set_bit(devnum, dev_map);
}
cdev_init(&ucm_dev->cdev, &ucm_fops);
ucm_dev->cdev.owner = THIS_MODULE;
kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum);
if (cdev_add(&ucm_dev->cdev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1))
if (cdev_add(&ucm_dev->cdev, base, 1))
goto err;
ucm_dev->dev.class = &cm_class;
@ -1278,7 +1316,10 @@ err_dev:
device_unregister(&ucm_dev->dev);
err_cdev:
cdev_del(&ucm_dev->cdev);
clear_bit(ucm_dev->devnum, dev_map);
if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
clear_bit(devnum, dev_map);
else
clear_bit(devnum, overflow_map);
err:
kfree(ucm_dev);
return;
@ -1298,6 +1339,7 @@ static ssize_t show_abi_version(struct class *class, struct class_attribute *att
{
return sprintf(buf, "%d\n", IB_USER_CM_ABI_VERSION);
}
static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
static int __init ib_ucm_init(void)
@ -1337,6 +1379,8 @@ static void __exit ib_ucm_cleanup(void)
ib_unregister_client(&ucm_client);
class_remove_file(&cm_class, &class_attr_abi_version);
unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
if (overflow_maj)
unregister_chrdev_region(overflow_maj, IB_UCM_MAX_DEVICES);
idr_destroy(&ctx_id_table);
}

View File

@ -34,10 +34,13 @@
#include <linux/file.h>
#include <linux/mutex.h>
#include <linux/poll.h>
#include <linux/sched.h>
#include <linux/idr.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/miscdevice.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <rdma/rdma_user_cm.h>
#include <rdma/ib_marshall.h>
@ -48,9 +51,7 @@ MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
MODULE_LICENSE("Dual BSD/GPL");
enum {
UCMA_MAX_BACKLOG = 1024
};
static unsigned int max_backlog = 1024;
struct ucma_file {
struct mutex mut;
@ -253,17 +254,17 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
if (!uevent)
return event->event == RDMA_CM_EVENT_CONNECT_REQUEST;
mutex_lock(&ctx->file->mut);
uevent->cm_id = cm_id;
ucma_set_event_context(ctx, event, uevent);
uevent->resp.event = event->event;
uevent->resp.status = event->status;
if (cm_id->ps == RDMA_PS_UDP || cm_id->ps == RDMA_PS_IPOIB)
if (cm_id->qp_type == IB_QPT_UD)
ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud);
else
ucma_copy_conn_event(&uevent->resp.param.conn,
&event->param.conn);
mutex_lock(&ctx->file->mut);
if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
if (!ctx->backlog) {
ret = -ENOMEM;
@ -298,7 +299,6 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
struct rdma_ucm_get_event cmd;
struct ucma_event *uevent;
int ret = 0;
DEFINE_WAIT(wait);
if (out_len < sizeof uevent->resp)
return -ENOSPC;
@ -332,6 +332,7 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
ctx->cm_id = uevent->cm_id;
ctx->cm_id->context = ctx;
uevent->resp.id = ctx->id;
ctx->cm_id->ucontext = ctx;
}
if (copy_to_user((void __user *)(unsigned long)cmd.response,
@ -350,13 +351,31 @@ done:
return ret;
}
static ssize_t ucma_create_id(struct ucma_file *file,
const char __user *inbuf,
static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type)
{
switch (cmd->ps) {
case RDMA_PS_TCP:
*qp_type = IB_QPT_RC;
return 0;
case RDMA_PS_UDP:
case RDMA_PS_IPOIB:
*qp_type = IB_QPT_UD;
return 0;
case RDMA_PS_IB:
*qp_type = cmd->qp_type;
return 0;
default:
return -EINVAL;
}
}
static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
int in_len, int out_len)
{
struct rdma_ucm_create_id cmd;
struct rdma_ucm_create_id_resp resp;
struct ucma_context *ctx;
enum ib_qp_type qp_type;
int ret;
if (out_len < sizeof(resp))
@ -365,6 +384,10 @@ static ssize_t ucma_create_id(struct ucma_file *file,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
ret = ucma_get_qp_type(&cmd, &qp_type);
if (ret)
return ret;
mutex_lock(&file->mut);
ctx = ucma_alloc_ctx(file);
mutex_unlock(&file->mut);
@ -372,11 +395,12 @@ static ssize_t ucma_create_id(struct ucma_file *file,
return -ENOMEM;
ctx->uid = cmd.uid;
ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps);
ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps, qp_type);
if (IS_ERR(ctx->cm_id)) {
ret = PTR_ERR(ctx->cm_id);
goto err1;
}
ctx->cm_id->ucontext = ctx;
resp.id = ctx->id;
if (copy_to_user((void __user *)(unsigned long)cmd.response,
@ -409,24 +433,6 @@ static void ucma_cleanup_multicast(struct ucma_context *ctx)
mutex_unlock(&mut);
}
static void ucma_cleanup_events(struct ucma_context *ctx)
{
struct ucma_event *uevent, *tmp;
list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
if (uevent->ctx != ctx)
continue;
list_del(&uevent->list);
/* clear incoming connections. */
if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
rdma_destroy_id(uevent->cm_id);
kfree(uevent);
}
}
static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
{
struct ucma_event *uevent, *tmp;
@ -440,9 +446,16 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
}
}
/*
* We cannot hold file->mut when calling rdma_destroy_id() or we can
* deadlock. We also acquire file->mut in ucma_event_handler(), and
* rdma_destroy_id() will wait until all callbacks have completed.
*/
static int ucma_free_ctx(struct ucma_context *ctx)
{
int events_reported;
struct ucma_event *uevent, *tmp;
LIST_HEAD(list);
/* No new events will be generated after destroying the id. */
rdma_destroy_id(ctx->cm_id);
@ -451,10 +464,20 @@ static int ucma_free_ctx(struct ucma_context *ctx)
/* Cleanup events not yet reported to the user. */
mutex_lock(&ctx->file->mut);
ucma_cleanup_events(ctx);
list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
if (uevent->ctx == ctx)
list_move_tail(&uevent->list, &list);
}
list_del(&ctx->list);
mutex_unlock(&ctx->file->mut);
list_for_each_entry_safe(uevent, tmp, &list, list) {
list_del(&uevent->list);
if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
rdma_destroy_id(uevent->cm_id);
kfree(uevent);
}
events_reported = ctx->events_reported;
kfree(ctx);
return events_reported;
@ -586,24 +609,14 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
struct rdma_route *route)
{
struct rdma_dev_addr *dev_addr;
struct net_device *dev;
u16 vid = 0;
resp->num_paths = route->num_paths;
switch (route->num_paths) {
case 0:
dev_addr = &route->addr.dev_addr;
dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
if (dev) {
vid = rdma_vlan_dev_vlan_id(dev);
dev_put(dev);
}
iboe_mac_vlan_to_ll((union ib_gid *) &resp->ib_route[0].dgid,
dev_addr->dst_dev_addr, vid);
iboe_addr_get_sgid(dev_addr,
(union ib_gid *) &resp->ib_route[0].sgid);
rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr,
(union ib_gid *)&resp->ib_route[0].dgid);
rdma_ip2gid((struct sockaddr *)&route->addr.src_addr,
(union ib_gid *)&resp->ib_route[0].sgid);
resp->ib_route[0].pkey = cpu_to_be16(0xffff);
break;
case 2:
@ -619,6 +632,16 @@ static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
}
}
static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp,
struct rdma_route *route)
{
struct rdma_dev_addr *dev_addr;
dev_addr = &route->addr.dev_addr;
rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid);
rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid);
}
static ssize_t ucma_query_route(struct ucma_file *file,
const char __user *inbuf,
int in_len, int out_len)
@ -653,8 +676,10 @@ static ssize_t ucma_query_route(struct ucma_file *file,
resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid;
resp.port_num = ctx->cm_id->port_num;
if (rdma_node_get_transport(ctx->cm_id->device->node_type) == RDMA_TRANSPORT_IB) {
switch (rdma_port_get_link_layer(ctx->cm_id->device, ctx->cm_id->port_num)) {
switch (rdma_node_get_transport(ctx->cm_id->device->node_type)) {
case RDMA_TRANSPORT_IB:
switch (rdma_port_get_link_layer(ctx->cm_id->device,
ctx->cm_id->port_num)) {
case IB_LINK_LAYER_INFINIBAND:
ucma_copy_ib_route(&resp, &ctx->cm_id->route);
break;
@ -664,6 +689,12 @@ static ssize_t ucma_query_route(struct ucma_file *file,
default:
break;
}
break;
case RDMA_TRANSPORT_IWARP:
ucma_copy_iw_route(&resp, &ctx->cm_id->route);
break;
default:
break;
}
out:
@ -727,8 +758,8 @@ static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
ctx->backlog = cmd.backlog > 0 && cmd.backlog < UCMA_MAX_BACKLOG ?
cmd.backlog : UCMA_MAX_BACKLOG;
ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ?
cmd.backlog : max_backlog;
ret = rdma_listen(ctx->cm_id, ctx->backlog);
ucma_put_ctx(ctx);
return ret;
@ -750,9 +781,12 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
return PTR_ERR(ctx);
if (cmd.conn_param.valid) {
ctx->uid = cmd.uid;
ucma_copy_conn_param(&conn_param, &cmd.conn_param);
mutex_lock(&file->mut);
ret = rdma_accept(ctx->cm_id, &conn_param);
if (!ret)
ctx->uid = cmd.uid;
mutex_unlock(&file->mut);
} else
ret = rdma_accept(ctx->cm_id, NULL);
@ -848,6 +882,20 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
}
rdma_set_service_type(ctx->cm_id, *((u8 *) optval));
break;
case RDMA_OPTION_ID_REUSEADDR:
if (optlen != sizeof(int)) {
ret = -EINVAL;
break;
}
ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0);
break;
case RDMA_OPTION_ID_AFONLY:
if (optlen != sizeof(int)) {
ret = -EINVAL;
break;
}
ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0);
break;
default:
ret = -ENOSYS;
}
@ -887,12 +935,22 @@ static int ucma_set_ib_path(struct ucma_context *ctx,
static int ucma_set_option_ib(struct ucma_context *ctx, int optname,
void *optval, size_t optlen)
{
int ret;
int ret = 0;
switch (optname) {
case RDMA_OPTION_IB_PATH:
ret = ucma_set_ib_path(ctx, optval, optlen);
break;
case RDMA_OPTION_IB_APM:
if (optlen != sizeof(u8)) {
ret = -EINVAL;
break;
}
if (*(u8 *)optval)
ret = rdma_enable_apm(ctx->cm_id, RDMA_ALT_PATH_BEST);
break;
default:
ret = -ENOSYS;
}
@ -937,20 +995,21 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
optval = kmalloc(cmd.optlen, GFP_KERNEL);
if (!optval) {
ret = -ENOMEM;
goto out1;
goto err_ucma_put_ctx;
}
if (copy_from_user(optval, (void __user *) (unsigned long) cmd.optval,
if (copy_from_user(optval, (void __user *)(unsigned long)cmd.optval,
cmd.optlen)) {
ret = -EFAULT;
goto out2;
goto err_kfree;
}
ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval,
cmd.optlen);
out2:
err_kfree:
kfree(optval);
out1:
err_ucma_put_ctx:
ucma_put_ctx(ctx);
return ret;
}
@ -1121,7 +1180,7 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
struct rdma_ucm_migrate_id cmd;
struct rdma_ucm_migrate_resp resp;
struct ucma_context *ctx;
struct file *filp;
struct fd f;
struct ucma_file *cur_file;
int ret = 0;
@ -1129,12 +1188,12 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
return -EFAULT;
/* Get current fd to protect against it being closed */
filp = fget(cmd.fd);
if (!filp)
f = fdget(cmd.fd);
if (!f.file)
return -ENOENT;
/* Validate current fd and prevent destruction of id. */
ctx = ucma_get_ctx(filp->private_data, cmd.id);
ctx = ucma_get_ctx(f.file->private_data, cmd.id);
if (IS_ERR(ctx)) {
ret = PTR_ERR(ctx);
goto file_put;
@ -1168,7 +1227,7 @@ response:
ucma_put_ctx(ctx);
file_put:
fput(filp);
fdput(f);
return ret;
}
@ -1209,7 +1268,7 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf,
if (copy_from_user(&hdr, buf, sizeof(hdr)))
return -EFAULT;
if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucma_cmd_table))
if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table))
return -EINVAL;
if (hdr.in + sizeof(hdr) > len)
@ -1261,7 +1320,8 @@ static int ucma_open(struct inode *inode, struct file *filp)
filp->private_data = file;
file->filp = filp;
return 0;
return nonseekable_open(inode, filp);
}
static int ucma_close(struct inode *inode, struct file *filp)
@ -1291,11 +1351,14 @@ static const struct file_operations ucma_fops = {
.release = ucma_close,
.write = ucma_write,
.poll = ucma_poll,
.llseek = no_llseek,
};
static struct miscdevice ucma_misc = {
.minor = MISC_DYNAMIC_MINOR,
.name = "rdma_cm",
.nodename = "infiniband/rdma_cm",
.mode = 0666,
.fops = &ucma_fops,
};
@ -1318,10 +1381,11 @@ static int __init ucma_init(void)
ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version);
if (ret) {
printk(KERN_ERR "rdma_ucm: couldn't create abi_version attr\n");
goto err;
goto err1;
}
return 0;
err:
err1:
misc_deregister(&ucma_misc);
return ret;
}

View File

@ -33,6 +33,7 @@
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/module.h>
#include <linux/if_ether.h>
#include <rdma/ib_pack.h>
@ -230,32 +231,28 @@ void ib_ud_header_init(int payload_bytes,
int immediate_present,
struct ib_ud_header *header)
{
u16 packet_length = 0;
memset(header, 0, sizeof *header);
if (lrh_present) {
u16 packet_length = 0;
header->lrh.link_version = 0;
header->lrh.link_next_header =
grh_present ? IB_LNH_IBA_GLOBAL : IB_LNH_IBA_LOCAL;
packet_length = IB_LRH_BYTES;
packet_length = (IB_LRH_BYTES +
IB_BTH_BYTES +
IB_DETH_BYTES +
(grh_present ? IB_GRH_BYTES : 0) +
payload_bytes +
4 + /* ICRC */
3) / 4; /* round up */
header->lrh.packet_length = cpu_to_be16(packet_length);
}
if (eth_present) {
if (vlan_present) {
if (vlan_present)
header->eth.type = cpu_to_be16(ETH_P_8021Q);
packet_length += IB_VLAN_BYTES;
}
packet_length += IB_ETH_BYTES;
}
packet_length += IB_BTH_BYTES + IB_DETH_BYTES + payload_bytes +
4 + /* ICRC */
3; /* round up */
packet_length /= 4;
if (grh_present) {
packet_length += IB_GRH_BYTES / 4;
header->grh.ip_version = 6;
header->grh.payload_length =
cpu_to_be16((IB_BTH_BYTES +
@ -266,9 +263,6 @@ void ib_ud_header_init(int payload_bytes,
header->grh.next_header = 0x1b;
}
if (lrh_present)
header->lrh.packet_length = cpu_to_be16(packet_length);
if (immediate_present)
header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
else
@ -284,36 +278,6 @@ void ib_ud_header_init(int payload_bytes,
}
EXPORT_SYMBOL(ib_ud_header_init);
/**
* ib_lrh_header_pack - Pack LRH header struct into wire format
* @lrh:unpacked LRH header struct
* @buf:Buffer to pack into
*
* ib_lrh_header_pack() packs the LRH header structure @lrh into
* wire format in the buffer @buf.
*/
int ib_lrh_header_pack(struct ib_unpacked_lrh *lrh, void *buf)
{
ib_pack(lrh_table, ARRAY_SIZE(lrh_table), lrh, buf);
return 0;
}
EXPORT_SYMBOL(ib_lrh_header_pack);
/**
* ib_lrh_header_unpack - Unpack LRH structure from wire format
* @lrh:unpacked LRH header struct
* @buf:Buffer to pack into
*
* ib_lrh_header_unpack() unpacks the LRH header structure from
* wire format (in buf) into @lrh.
*/
int ib_lrh_header_unpack(void *buf, struct ib_unpacked_lrh *lrh)
{
ib_unpack(lrh_table, ARRAY_SIZE(lrh_table), buf, lrh);
return 0;
}
EXPORT_SYMBOL(ib_lrh_header_unpack);
/**
* ib_ud_header_pack - Pack UD header struct into wire format
* @header:UD header struct
@ -337,14 +301,11 @@ int ib_ud_header_pack(struct ib_ud_header *header,
&header->eth, buf + len);
len += IB_ETH_BYTES;
}
if (header->vlan_present) {
ib_pack(vlan_table, ARRAY_SIZE(vlan_table),
&header->vlan, buf + len);
len += IB_VLAN_BYTES;
}
if (header->grh_present) {
ib_pack(grh_table, ARRAY_SIZE(grh_table),
&header->grh, buf + len);

View File

@ -35,109 +35,168 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/sched.h>
#ifdef __linux__
#include <linux/hugetlb.h>
#endif
#include <linux/dma-attrs.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <sys/priv.h>
#include <sys/resource.h>
#include <sys/resourcevar.h>
#include <vm/vm.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <vm/vm_pageout.h>
#include <vm/vm_map.h>
#include "uverbs.h"
#define IB_UMEM_MAX_PAGE_CHUNK (PAGE_SIZE / sizeof (struct page *))
static int allow_weak_ordering;
module_param(allow_weak_ordering, bool, 0444);
MODULE_PARM_DESC(allow_weak_ordering, "Allow weak ordering for data registered memory");
module_param_named(weak_ordering, allow_weak_ordering, int, 0444);
MODULE_PARM_DESC(weak_ordering, "Allow weak ordering for data registered memory");
#define IB_UMEM_MAX_PAGE_CHUNK \
((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) / \
((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \
(void *) &((struct ib_umem_chunk *) 0)->page_list[0]))
#ifdef __ia64__
extern int dma_map_sg_hp_wa;
static int dma_map_sg_ia64(struct ib_device *ibdev,
struct scatterlist *sg,
int nents,
enum dma_data_direction dir)
static struct ib_umem *peer_umem_get(struct ib_peer_memory_client *ib_peer_mem,
struct ib_umem *umem, unsigned long addr,
int dmasync, int invalidation_supported)
{
int i, rc, j, lents = 0;
struct device *dev;
int ret;
const struct peer_memory_client *peer_mem = ib_peer_mem->peer_mem;
struct invalidation_ctx *invalidation_ctx = NULL;
if (!dma_map_sg_hp_wa)
return ib_dma_map_sg(ibdev, sg, nents, dir);
dev = ibdev->dma_device;
for (i = 0; i < nents; ++i) {
rc = dma_map_sg(dev, sg + i, 1, dir);
if (rc <= 0) {
for (j = 0; j < i; ++j)
dma_unmap_sg(dev, sg + j, 1, dir);
return 0;
umem->ib_peer_mem = ib_peer_mem;
if (invalidation_supported) {
invalidation_ctx = kzalloc(sizeof(*invalidation_ctx), GFP_KERNEL);
if (!invalidation_ctx) {
ret = -ENOMEM;
goto out;
}
lents += rc;
umem->invalidation_ctx = invalidation_ctx;
invalidation_ctx->umem = umem;
mutex_lock(&ib_peer_mem->lock);
invalidation_ctx->context_ticket =
ib_peer_insert_context(ib_peer_mem, invalidation_ctx);
/* unlock before calling get pages to prevent a dead-lock from the callback */
mutex_unlock(&ib_peer_mem->lock);
}
return lents;
ret = peer_mem->get_pages(addr, umem->length, umem->writable, 1,
&umem->sg_head,
umem->peer_mem_client_context,
invalidation_ctx ?
(void *)invalidation_ctx->context_ticket : NULL);
if (invalidation_ctx) {
/* taking the lock back, checking that wasn't invalidated at that time */
mutex_lock(&ib_peer_mem->lock);
if (invalidation_ctx->peer_invalidated) {
printk(KERN_ERR "peer_umem_get: pages were invalidated by peer\n");
ret = -EINVAL;
}
}
if (ret)
goto out;
umem->page_size = peer_mem->get_page_size
(umem->peer_mem_client_context);
if (umem->page_size <= 0)
goto put_pages;
umem->offset = addr & ((unsigned long)umem->page_size - 1);
ret = peer_mem->dma_map(&umem->sg_head,
umem->peer_mem_client_context,
umem->context->device->dma_device,
dmasync,
&umem->nmap);
if (ret)
goto put_pages;
ib_peer_mem->stats.num_reg_pages +=
umem->nmap * (umem->page_size >> PAGE_SHIFT);
ib_peer_mem->stats.num_alloc_mrs += 1;
return umem;
put_pages:
peer_mem->put_pages(umem->peer_mem_client_context,
&umem->sg_head);
out:
if (invalidation_ctx) {
ib_peer_remove_context(ib_peer_mem, invalidation_ctx->context_ticket);
mutex_unlock(&umem->ib_peer_mem->lock);
kfree(invalidation_ctx);
}
ib_put_peer_client(ib_peer_mem, umem->peer_mem_client_context,
umem->peer_mem_srcu_key);
kfree(umem);
return ERR_PTR(ret);
}
static void dma_unmap_sg_ia64(struct ib_device *ibdev,
struct scatterlist *sg,
int nents,
enum dma_data_direction dir)
static void peer_umem_release(struct ib_umem *umem)
{
int i;
struct device *dev;
struct ib_peer_memory_client *ib_peer_mem = umem->ib_peer_mem;
const struct peer_memory_client *peer_mem = ib_peer_mem->peer_mem;
struct invalidation_ctx *invalidation_ctx = umem->invalidation_ctx;
if (!dma_map_sg_hp_wa)
return ib_dma_unmap_sg(ibdev, sg, nents, dir);
if (invalidation_ctx) {
int peer_callback;
int inflight_invalidation;
/* If we are not under peer callback we must take the lock before removing
* core ticket from the tree and releasing its umem.
* It will let any inflight callbacks to be ended safely.
* If we are under peer callback or under error flow of reg_mr so that context
* wasn't activated yet lock was already taken.
*/
if (invalidation_ctx->func && !invalidation_ctx->peer_callback)
mutex_lock(&ib_peer_mem->lock);
ib_peer_remove_context(ib_peer_mem, invalidation_ctx->context_ticket);
/* make sure to check inflight flag after took the lock and remove from tree.
* in addition, from that point using local variables for peer_callback and
* inflight_invalidation as after the complete invalidation_ctx can't be accessed
* any more as it may be freed by the callback.
*/
peer_callback = invalidation_ctx->peer_callback;
inflight_invalidation = invalidation_ctx->inflight_invalidation;
if (inflight_invalidation)
complete(&invalidation_ctx->comp);
/* On peer callback lock is handled externally */
if (!peer_callback)
/* unlocking before put_pages */
mutex_unlock(&ib_peer_mem->lock);
/* in case under callback context or callback is pending let it free the invalidation context */
if (!peer_callback && !inflight_invalidation)
kfree(invalidation_ctx);
}
peer_mem->dma_unmap(&umem->sg_head,
umem->peer_mem_client_context,
umem->context->device->dma_device);
peer_mem->put_pages(&umem->sg_head,
umem->peer_mem_client_context);
ib_peer_mem->stats.num_dereg_pages +=
umem->nmap * (umem->page_size >> PAGE_SHIFT);
ib_peer_mem->stats.num_dealloc_mrs += 1;
ib_put_peer_client(ib_peer_mem, umem->peer_mem_client_context,
umem->peer_mem_srcu_key);
kfree(umem);
return;
dev = ibdev->dma_device;
for (i = 0; i < nents; ++i)
dma_unmap_sg(dev, sg + i, 1, dir);
}
#define ib_dma_map_sg(dev, sg, nents, dir) dma_map_sg_ia64(dev, sg, nents, dir)
#define ib_dma_unmap_sg(dev, sg, nents, dir) dma_unmap_sg_ia64(dev, sg, nents, dir)
#endif
static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
{
#ifdef __linux__
struct ib_umem_chunk *chunk, *tmp;
int i;
list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
ib_dma_unmap_sg_attrs(dev, chunk->page_list,
chunk->nents, DMA_BIDIRECTIONAL, &chunk->attrs);
for (i = 0; i < chunk->nents; ++i) {
struct page *page = sg_page(&chunk->page_list[i]);
if (umem->writable && dirty)
set_page_dirty_lock(page);
put_page(page);
}
kfree(chunk);
}
#else
struct ib_umem_chunk *chunk, *tmp;
vm_object_t object;
struct scatterlist *sg;
struct page *page;
int i;
object = NULL;
list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
ib_dma_unmap_sg_attrs(dev, chunk->page_list,
chunk->nents, DMA_BIDIRECTIONAL, &chunk->attrs);
for (i = 0; i < chunk->nents; ++i) {
struct page *page = sg_page(&chunk->page_list[i]);
if (umem->nmap > 0)
ib_dma_unmap_sg(dev, umem->sg_head.sgl,
umem->nmap,
DMA_BIDIRECTIONAL);
for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
page = sg_page(sg);
if (umem->writable && dirty) {
if (object && object != page->object)
VM_OBJECT_WUNLOCK(object);
@ -148,14 +207,26 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
vm_page_dirty(page);
}
}
kfree(chunk);
}
sg_free_table(&umem->sg_head);
if (object)
VM_OBJECT_WUNLOCK(object);
#endif
}
void ib_umem_activate_invalidation_notifier(struct ib_umem *umem,
umem_invalidate_func_t func,
void *cookie)
{
struct invalidation_ctx *invalidation_ctx = umem->invalidation_ctx;
invalidation_ctx->func = func;
invalidation_ctx->cookie = cookie;
/* from that point any pending invalidations can be called */
mutex_unlock(&umem->ib_peer_mem->lock);
return;
}
EXPORT_SYMBOL(ib_umem_activate_invalidation_notifier);
/**
* ib_umem_get - Pin and DMA map userspace memory.
* @context: userspace context to pin memory for
@ -164,163 +235,23 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
* @access: IB_ACCESS_xxx flags for memory being pinned
* @dmasync: flush in-flight DMA when the memory region is written
*/
struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
size_t size, int access, int dmasync)
struct ib_umem *ib_umem_get_ex(struct ib_ucontext *context, unsigned long addr,
size_t size, int access, int dmasync,
int invalidation_supported)
{
#ifdef __linux__
struct ib_umem *umem;
struct page **page_list;
struct vm_area_struct **vma_list;
struct ib_umem_chunk *chunk;
unsigned long locked;
unsigned long lock_limit;
unsigned long cur_base;
unsigned long npages;
int ret;
int off;
int i;
DEFINE_DMA_ATTRS(attrs);
if (dmasync)
dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
else if (allow_weak_ordering)
dma_set_attr(DMA_ATTR_WEAK_ORDERING, &attrs);
if (!can_do_mlock())
return ERR_PTR(-EPERM);
umem = kmalloc(sizeof *umem, GFP_KERNEL);
if (!umem)
return ERR_PTR(-ENOMEM);
umem->context = context;
umem->length = size;
umem->offset = addr & ~PAGE_MASK;
umem->page_size = PAGE_SIZE;
/*
* We ask for writable memory if any access flags other than
* "remote read" are set. "Local write" and "remote write"
* obviously require write access. "Remote atomic" can do
* things like fetch and add, which will modify memory, and
* "MW bind" can change permissions by binding a window.
*/
umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ);
/* We assume the memory is from hugetlb until proved otherwise */
umem->hugetlb = 1;
INIT_LIST_HEAD(&umem->chunk_list);
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list) {
kfree(umem);
return ERR_PTR(-ENOMEM);
}
/*
* if we can't alloc the vma_list, it's not so bad;
* just assume the memory is not hugetlb memory
*/
vma_list = (struct vm_area_struct **) __get_free_page(GFP_KERNEL);
if (!vma_list)
umem->hugetlb = 0;
npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;
down_write(&current->mm->mmap_sem);
locked = npages + current->mm->locked_vm;
lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
ret = -ENOMEM;
goto out;
}
cur_base = addr & PAGE_MASK;
ret = 0;
while (npages) {
ret = get_user_pages(current, current->mm, cur_base,
min_t(unsigned long, npages,
PAGE_SIZE / sizeof (struct page *)),
1, !umem->writable, page_list, vma_list);
if (ret < 0)
goto out;
cur_base += ret * PAGE_SIZE;
npages -= ret;
off = 0;
while (ret) {
chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) *
min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK),
GFP_KERNEL);
if (!chunk) {
ret = -ENOMEM;
goto out;
}
chunk->attrs = attrs;
chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
sg_init_table(chunk->page_list, chunk->nents);
for (i = 0; i < chunk->nents; ++i) {
if (vma_list &&
!is_vm_hugetlb_page(vma_list[i + off]))
umem->hugetlb = 0;
sg_set_page(&chunk->page_list[i], page_list[i + off], PAGE_SIZE, 0);
}
chunk->nmap = ib_dma_map_sg_attrs(context->device,
&chunk->page_list[0],
chunk->nents,
DMA_BIDIRECTIONAL,
&attrs);
if (chunk->nmap <= 0) {
for (i = 0; i < chunk->nents; ++i)
put_page(sg_page(&chunk->page_list[i]));
kfree(chunk);
ret = -ENOMEM;
goto out;
}
ret -= chunk->nents;
off += chunk->nents;
list_add_tail(&chunk->list, &umem->chunk_list);
}
ret = 0;
}
out:
if (ret < 0) {
__ib_umem_release(context->device, umem, 0);
kfree(umem);
} else
current->mm->locked_vm = locked;
up_write(&current->mm->mmap_sem);
if (vma_list)
free_page((unsigned long) vma_list);
free_page((unsigned long) page_list);
return ret < 0 ? ERR_PTR(ret) : umem;
#else
struct ib_umem *umem;
struct ib_umem_chunk *chunk;
struct proc *proc;
pmap_t pmap;
vm_offset_t end, last, start;
vm_size_t npages;
int error;
int ents;
int ret;
int ents;
int i;
DEFINE_DMA_ATTRS(attrs);
struct scatterlist *sg, *sg_list_start;
int need_release = 0;
error = priv_check(curthread, PRIV_VM_MLOCK);
if (error)
@ -372,134 +303,115 @@ out:
* "MW bind" can change permissions by binding a window.
*/
umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ);
if (invalidation_supported || context->peer_mem_private_data) {
struct ib_peer_memory_client *peer_mem_client;
peer_mem_client = ib_get_peer_client(context, addr, size,
&umem->peer_mem_client_context,
&umem->peer_mem_srcu_key);
if (peer_mem_client)
return peer_umem_get(peer_mem_client, umem, addr,
dmasync, invalidation_supported);
}
umem->hugetlb = 0;
INIT_LIST_HEAD(&umem->chunk_list);
pmap = vm_map_pmap(&proc->p_vmspace->vm_map);
ret = 0;
while (npages) {
ents = min_t(int, npages, IB_UMEM_MAX_PAGE_CHUNK);
chunk = kmalloc(sizeof(*chunk) +
(sizeof(struct scatterlist) * ents),
GFP_KERNEL);
if (!chunk) {
ret = -ENOMEM;
if (npages == 0) {
ret = -EINVAL;
goto out;
}
chunk->attrs = attrs;
chunk->nents = ents;
sg_init_table(&chunk->page_list[0], ents);
for (i = 0; i < chunk->nents; ++i) {
ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
if (ret)
goto out;
need_release = 1;
sg_list_start = umem->sg_head.sgl;
while (npages) {
ents = min_t(int, npages, IB_UMEM_MAX_PAGE_CHUNK);
umem->npages += ents;
for_each_sg(sg_list_start, sg, ents, i) {
vm_paddr_t pa;
pa = pmap_extract(pmap, start);
if (pa == 0) {
ret = -ENOMEM;
kfree(chunk);
goto out;
}
sg_set_page(&chunk->page_list[i], PHYS_TO_VM_PAGE(pa),
sg_set_page(sg, PHYS_TO_VM_PAGE(pa),
PAGE_SIZE, 0);
npages--;
start += PAGE_SIZE;
}
chunk->nmap = ib_dma_map_sg_attrs(context->device,
&chunk->page_list[0],
chunk->nents,
/* preparing for next loop */
sg_list_start = sg;
}
umem->nmap = ib_dma_map_sg_attrs(context->device,
umem->sg_head.sgl,
umem->npages,
DMA_BIDIRECTIONAL,
&attrs);
if (chunk->nmap != chunk->nents) {
kfree(chunk);
if (umem->nmap != umem->npages) {
ret = -ENOMEM;
goto out;
}
list_add_tail(&chunk->list, &umem->chunk_list);
}
out:
if (ret < 0) {
if (need_release)
__ib_umem_release(context->device, umem, 0);
kfree(umem);
}
return ret < 0 ? ERR_PTR(ret) : umem;
#endif
}
EXPORT_SYMBOL(ib_umem_get_ex);
struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
size_t size, int access, int dmasync)
{
return ib_umem_get_ex(context, addr,
size, access, dmasync, 0);
}
EXPORT_SYMBOL(ib_umem_get);
#ifdef __linux__
static void ib_umem_account(struct work_struct *work)
{
struct ib_umem *umem = container_of(work, struct ib_umem, work);
down_write(&umem->mm->mmap_sem);
umem->mm->locked_vm -= umem->diff;
up_write(&umem->mm->mmap_sem);
mmput(umem->mm);
kfree(umem);
}
#endif
/**
* ib_umem_release - release memory pinned with ib_umem_get
* @umem: umem struct to release
*/
void ib_umem_release(struct ib_umem *umem)
{
#ifdef __linux__
struct ib_ucontext *context = umem->context;
struct mm_struct *mm;
unsigned long diff;
__ib_umem_release(umem->context->device, umem, 1);
mm = get_task_mm(current);
if (!mm) {
kfree(umem);
return;
}
diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
/*
* We may be called with the mm's mmap_sem already held. This
* can happen when a userspace munmap() is the call that drops
* the last reference to our file and calls our release
* method. If there are memory regions to destroy, we'll end
* up here and not be able to take the mmap_sem. In that case
* we defer the vm_locked accounting to the system workqueue.
*/
if (context->closing) {
if (!down_write_trylock(&mm->mmap_sem)) {
INIT_WORK(&umem->work, ib_umem_account);
umem->mm = mm;
umem->diff = diff;
schedule_work(&umem->work);
return;
}
} else
down_write(&mm->mmap_sem);
current->mm->locked_vm -= diff;
up_write(&mm->mmap_sem);
mmput(mm);
#else
vm_offset_t addr, end, last, start;
vm_size_t size;
int error;
if (umem->ib_peer_mem) {
peer_umem_release(umem);
return;
}
__ib_umem_release(umem->context->device, umem, 1);
if (umem->context->closing) {
kfree(umem);
return;
}
error = priv_check(curthread, PRIV_VM_MUNLOCK);
if (error)
return;
addr = umem->start;
size = umem->length;
last = addr + size;
@ -507,69 +419,24 @@ void ib_umem_release(struct ib_umem *umem)
end = roundup2(last, PAGE_SIZE); /* Use PAGE_MASK safe operation. */
vm_map_unwire(&curthread->td_proc->p_vmspace->vm_map, start, end,
VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
#endif
kfree(umem);
}
EXPORT_SYMBOL(ib_umem_release);
int ib_umem_page_count(struct ib_umem *umem)
{
struct ib_umem_chunk *chunk;
int shift;
int i;
int n;
struct scatterlist *sg;
shift = ilog2(umem->page_size);
n = 0;
list_for_each_entry(chunk, &umem->chunk_list, list)
for (i = 0; i < chunk->nmap; ++i)
n += sg_dma_len(&chunk->page_list[i]) >> shift;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
n += sg_dma_len(sg) >> shift;
return n;
}
EXPORT_SYMBOL(ib_umem_page_count);
/**********************************************/
/*
* Stub functions for contiguous pages -
* We currently do not support this feature
*/
/**********************************************/
/**
* ib_cmem_release_contiguous_pages - release memory allocated by
* ib_cmem_alloc_contiguous_pages.
* @cmem: cmem struct to release
*/
void ib_cmem_release_contiguous_pages(struct ib_cmem *cmem)
{
}
EXPORT_SYMBOL(ib_cmem_release_contiguous_pages);
/**
* * ib_cmem_alloc_contiguous_pages - allocate contiguous pages
* * @context: userspace context to allocate memory for
* * @total_size: total required size for that allocation.
* * @page_size_order: order of one contiguous page.
* */
struct ib_cmem *ib_cmem_alloc_contiguous_pages(struct ib_ucontext *context,
unsigned long total_size,
unsigned long page_size_order)
{
return NULL;
}
EXPORT_SYMBOL(ib_cmem_alloc_contiguous_pages);
/**
* * ib_cmem_map_contiguous_pages_to_vma - map contiguous pages into VMA
* * @ib_cmem: cmem structure returned by ib_cmem_alloc_contiguous_pages
* * @vma: VMA to inject pages into.
* */
int ib_cmem_map_contiguous_pages_to_vma(struct ib_cmem *ib_cmem,
struct vm_area_struct *vma)
{
return 0;
}
EXPORT_SYMBOL(ib_cmem_map_contiguous_pages_to_vma);

View File

@ -43,7 +43,9 @@
#include <linux/mutex.h>
#include <linux/kref.h>
#include <linux/compat.h>
#include <linux/sched.h>
#include <linux/semaphore.h>
#include <linux/slab.h>
#include <asm/uaccess.h>
@ -63,12 +65,9 @@ enum {
};
/*
* Our lifetime rules for these structs are the following: each time a
* device special file is opened, we look up the corresponding struct
* ib_umad_port by minor in the umad_port[] table while holding the
* port_lock. If this lookup succeeds, we take a reference on the
* ib_umad_port's struct ib_umad_device while still holding the
* port_lock; if the lookup fails, we fail the open(). We drop these
* Our lifetime rules for these structs are the following:
* device special file is opened, we take a reference on the
* ib_umad_port's struct ib_umad_device. We drop these
* references in the corresponding close().
*
* In addition to references coming from open character devices, there
@ -76,12 +75,7 @@ enum {
* module's reference taken when allocating the ib_umad_device in
* ib_umad_add_one().
*
* When destroying an ib_umad_device, we clear all of its
* ib_umad_ports from umad_port[] while holding port_lock before
* dropping the module's reference to the ib_umad_device. This is
* always safe because any open() calls will either succeed and obtain
* a reference before we clear the umad_port[] entries, or fail after
* we clear the umad_port[] entries.
* When destroying an ib_umad_device, we drop the module's reference.
*/
struct ib_umad_port {
@ -99,6 +93,7 @@ struct ib_umad_port {
struct ib_umad_device *umad_dev;
int dev_num;
u8 port_num;
struct list_head port_lst;
};
struct ib_umad_device {
@ -135,18 +130,85 @@ static struct class *umad_class;
static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
static DEFINE_SPINLOCK(port_lock);
static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS];
static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS);
static void ib_umad_add_one(struct ib_device *device);
static void ib_umad_remove_one(struct ib_device *device);
static void ib_umad_release_dev(struct kref *ref)
static DEFINE_SPINLOCK(ports_list_lock);
static struct list_head ports_list;
static void remove_ports(struct kref *ref)
{
int i;
struct ib_umad_port *p, *p1;
struct ib_umad_device *dev =
container_of(ref, struct ib_umad_device, ref);
for (i = 0; i <= dev->end_port - dev->start_port; ++i) {
struct ib_umad_port *port = &dev->port[i];
list_for_each_entry_safe(p, p1, &ports_list, port_lst)
if (p == port) {
list_del(&p->port_lst);
break;
}
}
}
static void put_umad_dev(struct kref *ref)
{
int ret, i;
struct ib_umad_device *dev =
container_of(ref, struct ib_umad_device, ref);
spin_lock(&ports_list_lock);
ret = (kref_put(ref, remove_ports));
spin_unlock(&ports_list_lock);
if (ret) {
for (i = 0; i <= dev->end_port - dev->start_port; ++i) {
if (dev->port[i].dev_num < IB_UMAD_MAX_PORTS)
clear_bit(dev->port[i].dev_num, dev_map);
else
clear_bit(dev->port[i].dev_num - IB_UMAD_MAX_PORTS, overflow_map);
cdev_del(dev->port[i].cdev);
cdev_del(dev->port[i].sm_cdev);
}
kfree(dev);
}
}
static void release_port(struct ib_umad_port *port)
{
put_umad_dev(&port->umad_dev->ref);
}
static struct ib_umad_port *get_port(struct cdev *cdev)
{
struct ib_umad_port *port;
spin_lock(&ports_list_lock);
list_for_each_entry(port, &ports_list, port_lst) {
if (port->cdev == cdev || port->sm_cdev == cdev) {
kref_get(&port->umad_dev->ref);
spin_unlock(&ports_list_lock);
return port;
}
}
spin_unlock(&ports_list_lock);
return NULL;
}
static void insert_port(struct ib_umad_port *port)
{
spin_lock(&ports_list_lock);
list_add(&port->port_lst, &ports_list);
spin_unlock(&ports_list_lock);
}
static int hdr_size(struct ib_umad_file *file)
@ -466,8 +528,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
goto err;
}
if (packet->mad.hdr.id < 0 ||
packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) {
if (packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) {
ret = -EINVAL;
goto err;
}
@ -679,7 +740,7 @@ found:
file->already_used = 1;
if (!file->use_pkey_index) {
printk(KERN_WARNING "user_mad: process %s did not enable "
"P_Key index support.\n", curproc->p_comm);
"P_Key index support.\n", curthread->td_proc->p_comm);
printk(KERN_WARNING "user_mad: Documentation/infiniband/user_mad.txt "
"has info on the new ABI.\n");
}
@ -711,7 +772,7 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)
mutex_lock(&file->port->file_mutex);
mutex_lock(&file->mutex);
if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) {
if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) {
ret = -EINVAL;
goto out;
}
@ -779,41 +840,33 @@ static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd,
/*
* ib_umad_open() does not need the BKL:
*
* - umad_port[] accesses are protected by port_lock, the
* ib_umad_port structures are properly reference counted, and
* - the ib_umad_port structures are properly reference counted, and
* everything else is purely local to the file being created, so
* races against other open calls are not a problem;
* - the ioctl method does not affect any global state outside of the
* file structure being operated on;
* - the port is added to umad_port[] as the last part of module
* initialization so the open method will either immediately run
* -ENXIO, or all required initialization will be done.
*/
static int ib_umad_open(struct inode *inode, struct file *filp)
{
struct ib_umad_port *port;
struct ib_umad_file *file;
int ret = 0;
spin_lock(&port_lock);
port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE];
if (port)
kref_get(&port->umad_dev->ref);
spin_unlock(&port_lock);
int ret;
port = get_port(inode->i_cdev->si_drv1);
if (!port)
return -ENXIO;
mutex_lock(&port->file_mutex);
if (!port->ib_dev) {
release_port(port);
ret = -ENXIO;
goto out;
}
file = kzalloc(sizeof *file, GFP_KERNEL);
if (!file) {
kref_put(&port->umad_dev->ref, ib_umad_release_dev);
release_port(port);
ret = -ENOMEM;
goto out;
}
@ -830,6 +883,8 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
list_add_tail(&file->port_list, &port->file_list);
ret = nonseekable_open(inode, filp);
out:
mutex_unlock(&port->file_mutex);
return ret;
@ -838,7 +893,7 @@ out:
static int ib_umad_close(struct inode *inode, struct file *filp)
{
struct ib_umad_file *file = filp->private_data;
struct ib_umad_device *dev = file->port->umad_dev;
struct ib_umad_port *port = file->port;
struct ib_umad_packet *packet, *tmp;
int already_dead;
int i;
@ -867,7 +922,7 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
mutex_unlock(&file->port->file_mutex);
kfree(file);
kref_put(&dev->ref, ib_umad_release_dev);
release_port(port);
return 0;
}
@ -882,7 +937,8 @@ static const struct file_operations umad_fops = {
.compat_ioctl = ib_umad_compat_ioctl,
#endif
.open = ib_umad_open,
.release = ib_umad_close
.release = ib_umad_close,
.llseek = no_llseek,
};
static int ib_umad_sm_open(struct inode *inode, struct file *filp)
@ -893,12 +949,7 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp)
};
int ret;
spin_lock(&port_lock);
port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE - IB_UMAD_MAX_PORTS];
if (port)
kref_get(&port->umad_dev->ref);
spin_unlock(&port_lock);
port = get_port(inode->i_cdev->si_drv1);
if (!port)
return -ENXIO;
@ -922,10 +973,10 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp)
filp->private_data = port;
return 0;
return nonseekable_open(inode, filp);
fail:
kref_put(&port->umad_dev->ref, ib_umad_release_dev);
release_port(port);
return ret;
}
@ -944,7 +995,7 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp)
up(&port->sm_sem);
kref_put(&port->umad_dev->ref, ib_umad_release_dev);
release_port(port);
return ret;
}
@ -952,7 +1003,8 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp)
static const struct file_operations umad_sm_fops = {
.owner = THIS_MODULE,
.open = ib_umad_sm_open,
.release = ib_umad_sm_close
.release = ib_umad_sm_close,
.llseek = no_llseek,
};
static struct ib_client umad_client = {
@ -991,31 +1043,66 @@ static ssize_t show_abi_version(struct class *class, struct class_attribute *att
}
static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
static dev_t overflow_maj;
static int find_overflow_devnum(void)
{
int ret;
if (!overflow_maj) {
ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2,
"infiniband_mad");
if (ret) {
printk(KERN_ERR "user_mad: couldn't register dynamic device number\n");
return ret;
}
}
ret = find_first_zero_bit(overflow_map, IB_UMAD_MAX_PORTS);
if (ret >= IB_UMAD_MAX_PORTS)
return -1;
return ret;
}
static int ib_umad_init_port(struct ib_device *device, int port_num,
struct ib_umad_port *port)
{
int devnum;
dev_t base;
spin_lock(&port_lock);
port->dev_num = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
if (port->dev_num >= IB_UMAD_MAX_PORTS) {
devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
if (devnum >= IB_UMAD_MAX_PORTS) {
spin_unlock(&port_lock);
devnum = find_overflow_devnum();
if (devnum < 0)
return -1;
spin_lock(&port_lock);
port->dev_num = devnum + IB_UMAD_MAX_PORTS;
base = devnum + overflow_maj;
set_bit(devnum, overflow_map);
} else {
port->dev_num = devnum;
base = devnum + base_dev;
set_bit(devnum, dev_map);
}
set_bit(port->dev_num, dev_map);
spin_unlock(&port_lock);
port->ib_dev = device;
port->port_num = port_num;
init_MUTEX(&port->sm_sem);
sema_init(&port->sm_sem, 1);
mutex_init(&port->file_mutex);
INIT_LIST_HEAD(&port->file_list);
port->cdev = cdev_alloc();
if (!port->cdev)
return -1;
port->cdev->owner = THIS_MODULE;
goto err_cdev_c;
port->cdev->ops = &umad_fops;
port->cdev->owner = THIS_MODULE;
kobject_set_name(&port->cdev->kobj, "umad%d", port->dev_num);
if (cdev_add(port->cdev, base_dev + port->dev_num, 1))
if (cdev_add(port->cdev, base, 1))
goto err_cdev;
port->dev = device_create(umad_class, device->dma_device,
@ -1029,13 +1116,15 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
if (device_create_file(port->dev, &dev_attr_port))
goto err_dev;
base += IB_UMAD_MAX_PORTS;
port->sm_cdev = cdev_alloc();
if (!port->sm_cdev)
goto err_dev;
port->sm_cdev->owner = THIS_MODULE;
port->sm_cdev->ops = &umad_sm_fops;
port->sm_cdev->owner = THIS_MODULE;
kobject_set_name(&port->sm_cdev->kobj, "issm%d", port->dev_num);
if (cdev_add(port->sm_cdev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1))
if (cdev_add(port->sm_cdev, base, 1))
goto err_sm_cdev;
port->sm_dev = device_create(umad_class, device->dma_device,
@ -1049,10 +1138,6 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
if (device_create_file(port->sm_dev, &dev_attr_port))
goto err_sm_dev;
spin_lock(&port_lock);
umad_port[port->dev_num] = port;
spin_unlock(&port_lock);
return 0;
err_sm_dev:
@ -1066,7 +1151,11 @@ err_dev:
err_cdev:
cdev_del(port->cdev);
clear_bit(port->dev_num, dev_map);
err_cdev_c:
if (port->dev_num < IB_UMAD_MAX_PORTS)
clear_bit(devnum, dev_map);
else
clear_bit(devnum, overflow_map);
return -1;
}
@ -1074,7 +1163,6 @@ err_cdev:
static void ib_umad_kill_port(struct ib_umad_port *port)
{
struct ib_umad_file *file;
int already_dead;
int id;
dev_set_drvdata(port->dev, NULL);
@ -1083,20 +1171,12 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
device_destroy(umad_class, port->cdev->dev);
device_destroy(umad_class, port->sm_cdev->dev);
cdev_del(port->cdev);
cdev_del(port->sm_cdev);
spin_lock(&port_lock);
umad_port[port->dev_num] = NULL;
spin_unlock(&port_lock);
mutex_lock(&port->file_mutex);
port->ib_dev = NULL;
list_for_each_entry(file, &port->file_list, port_list) {
mutex_lock(&file->mutex);
already_dead = file->agents_dead;
file->agents_dead = 1;
mutex_unlock(&file->mutex);
@ -1106,8 +1186,6 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
}
mutex_unlock(&port->file_mutex);
clear_bit(port->dev_num, dev_map);
}
static void ib_umad_add_one(struct ib_device *device)
@ -1136,10 +1214,12 @@ static void ib_umad_add_one(struct ib_device *device)
umad_dev->start_port = s;
umad_dev->end_port = e;
for (i = 0; i <= e - s; ++i)
insert_port(&umad_dev->port[i]);
for (i = s; i <= e; ++i) {
umad_dev->port[i - s].umad_dev = umad_dev;
if (rdma_port_get_link_layer(device, i) == IB_LINK_LAYER_INFINIBAND)
if (ib_umad_init_port(device, i, &umad_dev->port[i - s]))
goto err;
}
@ -1150,10 +1230,9 @@ static void ib_umad_add_one(struct ib_device *device)
err:
while (--i >= s)
if (rdma_port_get_link_layer(device, i) == IB_LINK_LAYER_INFINIBAND)
ib_umad_kill_port(&umad_dev->port[i - s]);
kref_put(&umad_dev->ref, ib_umad_release_dev);
put_umad_dev(&umad_dev->ref);
}
static void ib_umad_remove_one(struct ib_device *device)
@ -1165,16 +1244,22 @@ static void ib_umad_remove_one(struct ib_device *device)
return;
for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i)
if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND)
ib_umad_kill_port(&umad_dev->port[i]);
kref_put(&umad_dev->ref, ib_umad_release_dev);
put_umad_dev(&umad_dev->ref);
}
static char *umad_devnode(struct device *dev, umode_t *mode)
{
return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
}
static int __init ib_umad_init(void)
{
int ret;
INIT_LIST_HEAD(&ports_list);
ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2,
"infiniband_mad");
if (ret) {
@ -1189,6 +1274,8 @@ static int __init ib_umad_init(void)
goto out_chrdev;
}
umad_class->devnode = umad_devnode;
ret = class_create_file(umad_class, &class_attr_abi_version);
if (ret) {
printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n");
@ -1218,6 +1305,8 @@ static void __exit ib_umad_cleanup(void)
ib_unregister_client(&umad_client);
class_destroy(umad_class);
unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
if (overflow_maj)
unregister_chrdev_region(overflow_maj, IB_UMAD_MAX_PORTS * 2);
}
module_init(ib_umad_init);

View File

@ -41,10 +41,14 @@
#include <linux/idr.h>
#include <linux/mutex.h>
#include <linux/completion.h>
#include <linux/cdev.h>
#include <linux/rbtree.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_verbs_exp.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_user_verbs_exp.h>
/*
* Our lifetime rules for these structs are the following:
@ -69,24 +73,26 @@
struct ib_uverbs_device {
struct kref ref;
int num_comp_vectors;
struct completion comp;
int devnum;
struct cdev *cdev;
struct device *dev;
struct ib_device *ib_dev;
int num_comp_vectors;
int devnum;
struct cdev cdev;
struct rb_root xrcd_tree;
struct mutex xrcd_tree_mutex;
};
struct ib_uverbs_event_file {
struct kref ref;
struct file *filp;
int is_async;
struct ib_uverbs_file *uverbs_file;
spinlock_t lock;
int is_closed;
wait_queue_head_t poll_wait;
struct fasync_struct *async_queue;
struct list_head event_list;
int is_async;
int is_closed;
};
struct ib_uverbs_file {
@ -120,9 +126,20 @@ struct ib_uevent_object {
u32 events_reported;
};
struct ib_uxrcd_object {
struct ib_uobject uobject;
atomic_t refcnt;
};
struct ib_usrq_object {
struct ib_uevent_object uevent;
struct ib_uxrcd_object *uxrcd;
};
struct ib_uqp_object {
struct ib_uevent_object uevent;
struct list_head mcast_list;
struct ib_uxrcd_object *uxrcd;
};
struct ib_ucq_object {
@ -134,9 +151,8 @@ struct ib_ucq_object {
u32 async_events_reported;
};
struct ib_uxrcd_object {
struct ib_udct_object {
struct ib_uobject uobject;
struct list_head xrc_reg_qp_list;
};
extern spinlock_t ib_uverbs_idr_lock;
@ -147,12 +163,14 @@ extern struct idr ib_uverbs_ah_idr;
extern struct idr ib_uverbs_cq_idr;
extern struct idr ib_uverbs_qp_idr;
extern struct idr ib_uverbs_srq_idr;
extern struct idr ib_uverbs_xrc_domain_idr;
extern struct idr ib_uverbs_xrcd_idr;
extern struct idr ib_uverbs_rule_idr;
extern struct idr ib_uverbs_dct_idr;
void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
int is_async, int *fd);
int is_async);
struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
@ -167,12 +185,24 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
void ib_uverbs_xrc_rcv_qp_event_handler(struct ib_event *event,
void *context_ptr);
void ib_uverbs_dealloc_xrcd(struct ib_device *ib_dev,
struct ib_xrcd *xrcd);
int ib_uverbs_cleanup_xrc_rcv_qp(struct ib_uverbs_file *file,
struct ib_xrcd *xrcd, u32 qp_num);
void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
struct ib_uverbs_flow_spec {
union {
union {
struct ib_uverbs_flow_spec_hdr hdr;
struct {
__u32 type;
__u16 size;
__u16 reserved;
};
};
struct ib_uverbs_flow_spec_eth eth;
struct ib_uverbs_flow_spec_ib ib;
struct ib_uverbs_flow_spec_ipv4 ipv4;
struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
};
};
#define IB_UVERBS_DECLARE_CMD(name) \
ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
@ -186,6 +216,8 @@ IB_UVERBS_DECLARE_CMD(alloc_pd);
IB_UVERBS_DECLARE_CMD(dealloc_pd);
IB_UVERBS_DECLARE_CMD(reg_mr);
IB_UVERBS_DECLARE_CMD(dereg_mr);
IB_UVERBS_DECLARE_CMD(alloc_mw);
IB_UVERBS_DECLARE_CMD(dealloc_mw);
IB_UVERBS_DECLARE_CMD(create_comp_channel);
IB_UVERBS_DECLARE_CMD(create_cq);
IB_UVERBS_DECLARE_CMD(resize_cq);
@ -193,6 +225,7 @@ IB_UVERBS_DECLARE_CMD(poll_cq);
IB_UVERBS_DECLARE_CMD(req_notify_cq);
IB_UVERBS_DECLARE_CMD(destroy_cq);
IB_UVERBS_DECLARE_CMD(create_qp);
IB_UVERBS_DECLARE_CMD(open_qp);
IB_UVERBS_DECLARE_CMD(query_qp);
IB_UVERBS_DECLARE_CMD(modify_qp);
IB_UVERBS_DECLARE_CMD(destroy_qp);
@ -207,14 +240,30 @@ IB_UVERBS_DECLARE_CMD(create_srq);
IB_UVERBS_DECLARE_CMD(modify_srq);
IB_UVERBS_DECLARE_CMD(query_srq);
IB_UVERBS_DECLARE_CMD(destroy_srq);
IB_UVERBS_DECLARE_CMD(create_xrc_srq);
IB_UVERBS_DECLARE_CMD(open_xrc_domain);
IB_UVERBS_DECLARE_CMD(close_xrc_domain);
IB_UVERBS_DECLARE_CMD(create_xrc_rcv_qp);
IB_UVERBS_DECLARE_CMD(modify_xrc_rcv_qp);
IB_UVERBS_DECLARE_CMD(query_xrc_rcv_qp);
IB_UVERBS_DECLARE_CMD(reg_xrc_rcv_qp);
IB_UVERBS_DECLARE_CMD(unreg_xrc_rcv_qp);
IB_UVERBS_DECLARE_CMD(create_xsrq);
IB_UVERBS_DECLARE_CMD(open_xrcd);
IB_UVERBS_DECLARE_CMD(close_xrcd);
#define IB_UVERBS_DECLARE_EX_CMD(name) \
int ib_uverbs_ex_##name(struct ib_uverbs_file *file,\
struct ib_udata *ucore, \
struct ib_udata *uhw)
#define IB_UVERBS_DECLARE_EXP_CMD(name) \
ssize_t ib_uverbs_exp_##name(struct ib_uverbs_file *file, \
struct ib_udata *ucore, \
struct ib_udata *uhw)
IB_UVERBS_DECLARE_EX_CMD(create_flow);
IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
IB_UVERBS_DECLARE_EXP_CMD(create_qp);
IB_UVERBS_DECLARE_EXP_CMD(modify_cq);
IB_UVERBS_DECLARE_EXP_CMD(modify_qp);
IB_UVERBS_DECLARE_EXP_CMD(create_cq);
IB_UVERBS_DECLARE_EXP_CMD(query_device);
IB_UVERBS_DECLARE_EXP_CMD(create_dct);
IB_UVERBS_DECLARE_EXP_CMD(destroy_dct);
IB_UVERBS_DECLARE_EXP_CMD(query_dct);
#endif /* UVERBS_H */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -30,6 +30,7 @@
* SOFTWARE.
*/
#include <linux/module.h>
#include <rdma/ib_marshall.h>
void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
@ -40,18 +41,21 @@ void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
dst->grh.sgid_index = src->grh.sgid_index;
dst->grh.hop_limit = src->grh.hop_limit;
dst->grh.traffic_class = src->grh.traffic_class;
memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved));
dst->dlid = src->dlid;
dst->sl = src->sl;
dst->src_path_bits = src->src_path_bits;
dst->static_rate = src->static_rate;
dst->is_global = src->ah_flags & IB_AH_GRH ? 1 : 0;
dst->port_num = src->port_num;
dst->reserved = 0;
}
EXPORT_SYMBOL(ib_copy_ah_attr_to_user);
void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
struct ib_qp_attr *src)
{
dst->qp_state = src->qp_state;
dst->cur_qp_state = src->cur_qp_state;
dst->path_mtu = src->path_mtu;
dst->path_mig_state = src->path_mig_state;
@ -83,6 +87,7 @@ void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
dst->rnr_retry = src->rnr_retry;
dst->alt_port_num = src->alt_port_num;
dst->alt_timeout = src->alt_timeout;
memset(dst->reserved, 0, sizeof(dst->reserved));
}
EXPORT_SYMBOL(ib_copy_qp_attr_to_user);

View File

@ -38,10 +38,13 @@
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_addr.h>
int ib_rate_to_mult(enum ib_rate rate)
{
@ -77,6 +80,31 @@ enum ib_rate mult_to_ib_rate(int mult)
}
EXPORT_SYMBOL(mult_to_ib_rate);
int ib_rate_to_mbps(enum ib_rate rate)
{
switch (rate) {
case IB_RATE_2_5_GBPS: return 2500;
case IB_RATE_5_GBPS: return 5000;
case IB_RATE_10_GBPS: return 10000;
case IB_RATE_20_GBPS: return 20000;
case IB_RATE_30_GBPS: return 30000;
case IB_RATE_40_GBPS: return 40000;
case IB_RATE_60_GBPS: return 60000;
case IB_RATE_80_GBPS: return 80000;
case IB_RATE_120_GBPS: return 120000;
case IB_RATE_14_GBPS: return 14062;
case IB_RATE_56_GBPS: return 56250;
case IB_RATE_112_GBPS: return 112500;
case IB_RATE_168_GBPS: return 168750;
case IB_RATE_25_GBPS: return 25781;
case IB_RATE_100_GBPS: return 103125;
case IB_RATE_200_GBPS: return 206250;
case IB_RATE_300_GBPS: return 309375;
default: return -1;
}
}
EXPORT_SYMBOL(ib_rate_to_mbps);
enum rdma_transport_type
rdma_node_get_transport(enum rdma_node_type node_type)
{
@ -87,6 +115,8 @@ rdma_node_get_transport(enum rdma_node_type node_type)
return RDMA_TRANSPORT_IB;
case RDMA_NODE_RNIC:
return RDMA_TRANSPORT_IWARP;
case RDMA_NODE_MIC:
return RDMA_TRANSPORT_SCIF;
default:
BUG();
return 0;
@ -104,6 +134,8 @@ enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_
return IB_LINK_LAYER_INFINIBAND;
case RDMA_TRANSPORT_IWARP:
return IB_LINK_LAYER_ETHERNET;
case RDMA_TRANSPORT_SCIF:
return IB_LINK_LAYER_SCIF;
default:
return IB_LINK_LAYER_UNSPECIFIED;
}
@ -162,8 +194,29 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
u32 flow_class;
u16 gid_index;
int ret;
int is_eth = (rdma_port_get_link_layer(device, port_num) ==
IB_LINK_LAYER_ETHERNET);
memset(ah_attr, 0, sizeof *ah_attr);
if (is_eth) {
if (!(wc->wc_flags & IB_WC_GRH))
return -EPROTOTYPE;
if (wc->wc_flags & IB_WC_WITH_SMAC &&
wc->wc_flags & IB_WC_WITH_VLAN) {
memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
ah_attr->vlan_id = wc->vlan_id;
} else {
ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid,
ah_attr->dmac, &ah_attr->vlan_id);
if (ret)
return ret;
}
} else {
ah_attr->vlan_id = 0xffff;
}
ah_attr->dlid = wc->slid;
ah_attr->sl = wc->sl;
ah_attr->src_path_bits = wc->dlid_path_bits;
@ -250,8 +303,13 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
srq->uobject = NULL;
srq->event_handler = srq_init_attr->event_handler;
srq->srq_context = srq_init_attr->srq_context;
srq->ext.xrc.cq = NULL;
srq->ext.xrc.xrcd = NULL;
srq->srq_type = srq_init_attr->srq_type;
if (srq->srq_type == IB_SRQT_XRC) {
srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd;
srq->ext.xrc.cq = srq_init_attr->ext.xrc.cq;
atomic_inc(&srq->ext.xrc.xrcd->usecnt);
atomic_inc(&srq->ext.xrc.cq->usecnt);
}
atomic_inc(&pd->usecnt);
atomic_set(&srq->usecnt, 0);
}
@ -260,36 +318,6 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
}
EXPORT_SYMBOL(ib_create_srq);
struct ib_srq *ib_create_xrc_srq(struct ib_pd *pd,
struct ib_cq *xrc_cq,
struct ib_xrcd *xrcd,
struct ib_srq_init_attr *srq_init_attr)
{
struct ib_srq *srq;
if (!pd->device->create_xrc_srq)
return ERR_PTR(-ENOSYS);
srq = pd->device->create_xrc_srq(pd, xrc_cq, xrcd, srq_init_attr, NULL);
if (!IS_ERR(srq)) {
srq->device = pd->device;
srq->pd = pd;
srq->uobject = NULL;
srq->event_handler = srq_init_attr->event_handler;
srq->srq_context = srq_init_attr->srq_context;
srq->ext.xrc.cq = xrc_cq;
srq->ext.xrc.xrcd = xrcd;
atomic_inc(&pd->usecnt);
atomic_inc(&xrcd->usecnt);
atomic_inc(&xrc_cq->usecnt);
atomic_set(&srq->usecnt, 0);
}
return srq;
}
EXPORT_SYMBOL(ib_create_xrc_srq);
int ib_modify_srq(struct ib_srq *srq,
struct ib_srq_attr *srq_attr,
enum ib_srq_attr_mask srq_attr_mask)
@ -308,27 +336,39 @@ int ib_query_srq(struct ib_srq *srq,
}
EXPORT_SYMBOL(ib_query_srq);
int ib_query_values(struct ib_device *device,
int q_values, struct ib_device_values *values)
{
return device->query_values ?
device->query_values(device, q_values, values) : -ENOSYS;
}
EXPORT_SYMBOL(ib_query_values);
int ib_destroy_srq(struct ib_srq *srq)
{
struct ib_pd *pd;
struct ib_cq *xrc_cq;
struct ib_xrcd *xrcd;
enum ib_srq_type srq_type;
struct ib_xrcd *uninitialized_var(xrcd);
struct ib_cq *uninitialized_var(cq);
int ret;
if (atomic_read(&srq->usecnt))
return -EBUSY;
pd = srq->pd;
xrc_cq = srq->ext.xrc.cq;
srq_type = srq->srq_type;
if (srq_type == IB_SRQT_XRC) {
xrcd = srq->ext.xrc.xrcd;
cq = srq->ext.xrc.cq;
}
ret = srq->device->destroy_srq(srq);
if (!ret) {
atomic_dec(&pd->usecnt);
if (xrc_cq)
atomic_dec(&xrc_cq->usecnt);
if (xrcd)
if (srq_type == IB_SRQT_XRC) {
atomic_dec(&xrcd->usecnt);
atomic_dec(&cq->usecnt);
}
}
return ret;
@ -337,32 +377,130 @@ EXPORT_SYMBOL(ib_destroy_srq);
/* Queue pairs */
static void __ib_shared_qp_event_handler(struct ib_event *event, void *context)
{
struct ib_qp *qp = context;
unsigned long flags;
/* The code below must be synced with deletions of existing qps (ib_close_qp) --
* because a qp from the list may be closed during the scan, resulting in a kernel Oops.
*/
spin_lock_irqsave(&qp->device->event_handler_lock, flags);
list_for_each_entry(event->element.qp, &qp->open_list, open_list)
if (event->element.qp->event_handler)
event->element.qp->event_handler(event, event->element.qp->qp_context);
spin_unlock_irqrestore(&qp->device->event_handler_lock, flags);
}
static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp)
{
mutex_lock(&xrcd->tgt_qp_mutex);
list_add(&qp->xrcd_list, &xrcd->tgt_qp_list);
mutex_unlock(&xrcd->tgt_qp_mutex);
}
static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp,
void (*event_handler)(struct ib_event *, void *),
void *qp_context)
{
struct ib_qp *qp;
unsigned long flags;
qp = kzalloc(sizeof *qp, GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
qp->real_qp = real_qp;
atomic_inc(&real_qp->usecnt);
qp->device = real_qp->device;
qp->event_handler = event_handler;
qp->qp_context = qp_context;
qp->qp_num = real_qp->qp_num;
qp->qp_type = real_qp->qp_type;
spin_lock_irqsave(&real_qp->device->event_handler_lock, flags);
list_add(&qp->open_list, &real_qp->open_list);
spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
return qp;
}
struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
struct ib_qp_open_attr *qp_open_attr)
{
struct ib_qp *qp, *real_qp;
if (qp_open_attr->qp_type != IB_QPT_XRC_TGT)
return ERR_PTR(-EINVAL);
qp = ERR_PTR(-EINVAL);
mutex_lock(&xrcd->tgt_qp_mutex);
list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) {
if (real_qp->qp_num == qp_open_attr->qp_num) {
qp = __ib_open_qp(real_qp, qp_open_attr->event_handler,
qp_open_attr->qp_context);
break;
}
}
mutex_unlock(&xrcd->tgt_qp_mutex);
return qp;
}
EXPORT_SYMBOL(ib_open_qp);
struct ib_qp *ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *qp_init_attr)
{
struct ib_qp *qp;
struct ib_qp *qp, *real_qp;
struct ib_device *device;
qp = pd->device->create_qp(pd, qp_init_attr, NULL);
device = pd ? pd->device : qp_init_attr->xrcd->device;
qp = device->create_qp(pd, qp_init_attr, NULL);
if (!IS_ERR(qp)) {
qp->device = pd->device;
qp->pd = pd;
qp->send_cq = qp_init_attr->send_cq;
qp->recv_cq = qp_init_attr->recv_cq;
qp->srq = qp_init_attr->srq;
qp->device = device;
qp->real_qp = qp;
qp->uobject = NULL;
qp->qp_type = qp_init_attr->qp_type;
atomic_set(&qp->usecnt, 0);
if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) {
qp->event_handler = __ib_shared_qp_event_handler;
qp->qp_context = qp;
qp->pd = NULL;
qp->send_cq = qp->recv_cq = NULL;
qp->srq = NULL;
qp->xrcd = qp_init_attr->xrcd;
atomic_inc(&qp_init_attr->xrcd->usecnt);
INIT_LIST_HEAD(&qp->open_list);
real_qp = qp;
qp = __ib_open_qp(real_qp, qp_init_attr->event_handler,
qp_init_attr->qp_context);
if (!IS_ERR(qp))
__ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp);
else
real_qp->device->destroy_qp(real_qp);
} else {
qp->event_handler = qp_init_attr->event_handler;
qp->qp_context = qp_init_attr->qp_context;
qp->qp_type = qp_init_attr->qp_type;
qp->xrcd = qp->qp_type == IB_QPT_XRC ?
qp_init_attr->xrcd : NULL;
atomic_inc(&pd->usecnt);
atomic_inc(&qp_init_attr->send_cq->usecnt);
if (qp_init_attr->qp_type == IB_QPT_XRC_INI) {
qp->recv_cq = NULL;
qp->srq = NULL;
} else {
qp->recv_cq = qp_init_attr->recv_cq;
atomic_inc(&qp_init_attr->recv_cq->usecnt);
if (qp_init_attr->srq)
qp->srq = qp_init_attr->srq;
if (qp->srq)
atomic_inc(&qp_init_attr->srq->usecnt);
if (qp->qp_type == IB_QPT_XRC)
atomic_inc(&qp->xrcd->usecnt);
}
qp->pd = pd;
qp->send_cq = qp_init_attr->send_cq;
qp->xrcd = NULL;
atomic_inc(&pd->usecnt);
atomic_inc(&qp_init_attr->send_cq->usecnt);
}
}
return qp;
@ -371,8 +509,10 @@ EXPORT_SYMBOL(ib_create_qp);
static const struct {
int valid;
enum ib_qp_attr_mask req_param[IB_QPT_RAW_PACKET + 1];
enum ib_qp_attr_mask opt_param[IB_QPT_RAW_PACKET + 1];
enum ib_qp_attr_mask req_param[IB_QPT_MAX];
enum ib_qp_attr_mask req_param_add_eth[IB_QPT_MAX];
enum ib_qp_attr_mask opt_param[IB_QPT_MAX];
enum ib_qp_attr_mask opt_param_add_eth[IB_QPT_MAX];
} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
[IB_QPS_RESET] = {
[IB_QPS_RESET] = { .valid = 1 },
@ -389,13 +529,24 @@ static const struct {
[IB_QPT_RC] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
[IB_QPT_XRC] = (IB_QP_PKEY_INDEX |
[IB_QPT_DC_INI] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS |
IB_QP_DC_KEY),
[IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
[IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
[IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
},
.opt_param = {
[IB_QPT_UD] = IB_QP_GROUP_RSS,
[IB_QPT_RAW_PACKET] = IB_QP_GROUP_RSS
}
},
},
@ -414,7 +565,13 @@ static const struct {
[IB_QPT_RC] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
[IB_QPT_XRC] = (IB_QP_PKEY_INDEX |
[IB_QPT_DC_INI] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
[IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
[IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
[IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
@ -436,13 +593,26 @@ static const struct {
IB_QP_RQ_PSN |
IB_QP_MAX_DEST_RD_ATOMIC |
IB_QP_MIN_RNR_TIMER),
[IB_QPT_XRC] = (IB_QP_AV |
[IB_QPT_DC_INI] = (IB_QP_PATH_MTU |
IB_QP_MAX_DEST_RD_ATOMIC |
IB_QP_MIN_RNR_TIMER),
[IB_QPT_XRC_INI] = (IB_QP_AV |
IB_QP_PATH_MTU |
IB_QP_DEST_QPN |
IB_QP_RQ_PSN),
[IB_QPT_XRC_TGT] = (IB_QP_AV |
IB_QP_PATH_MTU |
IB_QP_DEST_QPN |
IB_QP_RQ_PSN |
IB_QP_MAX_DEST_RD_ATOMIC |
IB_QP_MIN_RNR_TIMER),
},
.req_param_add_eth = {
[IB_QPT_RC] = (IB_QP_SMAC),
[IB_QPT_UC] = (IB_QP_SMAC),
[IB_QPT_XRC_INI] = (IB_QP_SMAC),
[IB_QPT_XRC_TGT] = (IB_QP_SMAC)
},
.opt_param = {
[IB_QPT_UD] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
@ -452,13 +622,34 @@ static const struct {
[IB_QPT_RC] = (IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_PKEY_INDEX),
[IB_QPT_XRC] = (IB_QP_ALT_PATH |
[IB_QPT_DC_INI] = (IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_PKEY_INDEX),
[IB_QPT_XRC_INI] = (IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_PKEY_INDEX),
[IB_QPT_XRC_TGT] = (IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_PKEY_INDEX),
[IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
IB_QP_QKEY),
[IB_QPT_RAW_PACKET] = IB_QP_AV,
},
.opt_param_add_eth = {
[IB_QPT_RC] = (IB_QP_ALT_SMAC |
IB_QP_VID |
IB_QP_ALT_VID),
[IB_QPT_UC] = (IB_QP_ALT_SMAC |
IB_QP_VID |
IB_QP_ALT_VID),
[IB_QPT_XRC_INI] = (IB_QP_ALT_SMAC |
IB_QP_VID |
IB_QP_ALT_VID),
[IB_QPT_XRC_TGT] = (IB_QP_ALT_SMAC |
IB_QP_VID |
IB_QP_ALT_VID)
}
}
},
@ -475,11 +666,17 @@ static const struct {
IB_QP_RNR_RETRY |
IB_QP_SQ_PSN |
IB_QP_MAX_QP_RD_ATOMIC),
[IB_QPT_XRC] = (IB_QP_TIMEOUT |
[IB_QPT_DC_INI] = (IB_QP_TIMEOUT |
IB_QP_RETRY_CNT |
IB_QP_RNR_RETRY |
IB_QP_MAX_QP_RD_ATOMIC),
[IB_QPT_XRC_INI] = (IB_QP_TIMEOUT |
IB_QP_RETRY_CNT |
IB_QP_RNR_RETRY |
IB_QP_SQ_PSN |
IB_QP_MAX_QP_RD_ATOMIC),
[IB_QPT_XRC_TGT] = (IB_QP_TIMEOUT |
IB_QP_SQ_PSN),
[IB_QPT_SMI] = IB_QP_SQ_PSN,
[IB_QPT_GSI] = IB_QP_SQ_PSN,
},
@ -495,7 +692,16 @@ static const struct {
IB_QP_ACCESS_FLAGS |
IB_QP_MIN_RNR_TIMER |
IB_QP_PATH_MIG_STATE),
[IB_QPT_XRC] = (IB_QP_CUR_STATE |
[IB_QPT_DC_INI] = (IB_QP_CUR_STATE |
IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_MIN_RNR_TIMER |
IB_QP_PATH_MIG_STATE),
[IB_QPT_XRC_INI] = (IB_QP_CUR_STATE |
IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_PATH_MIG_STATE),
[IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE |
IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_MIN_RNR_TIMER |
@ -524,7 +730,16 @@ static const struct {
IB_QP_ALT_PATH |
IB_QP_PATH_MIG_STATE |
IB_QP_MIN_RNR_TIMER),
[IB_QPT_XRC] = (IB_QP_CUR_STATE |
[IB_QPT_DC_INI] = (IB_QP_CUR_STATE |
IB_QP_ACCESS_FLAGS |
IB_QP_ALT_PATH |
IB_QP_PATH_MIG_STATE |
IB_QP_MIN_RNR_TIMER),
[IB_QPT_XRC_INI] = (IB_QP_CUR_STATE |
IB_QP_ACCESS_FLAGS |
IB_QP_ALT_PATH |
IB_QP_PATH_MIG_STATE),
[IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE |
IB_QP_ACCESS_FLAGS |
IB_QP_ALT_PATH |
IB_QP_PATH_MIG_STATE |
@ -541,7 +756,8 @@ static const struct {
[IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY,
[IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
[IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
[IB_QPT_XRC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
[IB_QPT_XRC_INI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
[IB_QPT_XRC_TGT] = IB_QP_EN_SQD_ASYNC_NOTIFY, /* ??? */
[IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
[IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY
}
@ -564,7 +780,11 @@ static const struct {
IB_QP_ACCESS_FLAGS |
IB_QP_MIN_RNR_TIMER |
IB_QP_PATH_MIG_STATE),
[IB_QPT_XRC] = (IB_QP_CUR_STATE |
[IB_QPT_XRC_INI] = (IB_QP_CUR_STATE |
IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_PATH_MIG_STATE),
[IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE |
IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_MIN_RNR_TIMER |
@ -597,12 +817,19 @@ static const struct {
IB_QP_PKEY_INDEX |
IB_QP_MIN_RNR_TIMER |
IB_QP_PATH_MIG_STATE),
[IB_QPT_XRC] = (IB_QP_PORT |
[IB_QPT_XRC_INI] = (IB_QP_PORT |
IB_QP_AV |
IB_QP_TIMEOUT |
IB_QP_RETRY_CNT |
IB_QP_RNR_RETRY |
IB_QP_MAX_QP_RD_ATOMIC |
IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
IB_QP_PKEY_INDEX |
IB_QP_PATH_MIG_STATE),
[IB_QPT_XRC_TGT] = (IB_QP_PORT |
IB_QP_AV |
IB_QP_TIMEOUT |
IB_QP_MAX_DEST_RD_ATOMIC |
IB_QP_ALT_PATH |
IB_QP_ACCESS_FLAGS |
@ -640,7 +867,8 @@ static const struct {
};
int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
enum ib_qp_type type, enum ib_qp_attr_mask mask)
enum ib_qp_type type, enum ib_qp_attr_mask mask,
enum rdma_link_layer ll)
{
enum ib_qp_attr_mask req_param, opt_param;
@ -659,6 +887,13 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
req_param = qp_state_table[cur_state][next_state].req_param[type];
opt_param = qp_state_table[cur_state][next_state].opt_param[type];
if (ll == IB_LINK_LAYER_ETHERNET) {
req_param |= qp_state_table[cur_state][next_state].
req_param_add_eth[type];
opt_param |= qp_state_table[cur_state][next_state].
opt_param_add_eth[type];
}
if ((mask & req_param) != req_param)
return 0;
@ -673,7 +908,13 @@ int ib_modify_qp(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
int qp_attr_mask)
{
return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL);
int ret;
ret = qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
if (!ret && (qp_attr_mask & IB_QP_PORT))
qp->port_num = qp_attr->port_num;
return ret;
}
EXPORT_SYMBOL(ib_modify_qp);
@ -683,35 +924,87 @@ int ib_query_qp(struct ib_qp *qp,
struct ib_qp_init_attr *qp_init_attr)
{
return qp->device->query_qp ?
qp->device->query_qp(qp, qp_attr, qp_attr_mask, qp_init_attr) :
qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) :
-ENOSYS;
}
EXPORT_SYMBOL(ib_query_qp);
int ib_close_qp(struct ib_qp *qp)
{
struct ib_qp *real_qp;
unsigned long flags;
real_qp = qp->real_qp;
if (real_qp == qp)
return -EINVAL;
spin_lock_irqsave(&real_qp->device->event_handler_lock, flags);
list_del(&qp->open_list);
spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
atomic_dec(&real_qp->usecnt);
kfree(qp);
return 0;
}
EXPORT_SYMBOL(ib_close_qp);
static int __ib_destroy_shared_qp(struct ib_qp *qp)
{
struct ib_xrcd *xrcd;
struct ib_qp *real_qp;
int ret;
real_qp = qp->real_qp;
xrcd = real_qp->xrcd;
mutex_lock(&xrcd->tgt_qp_mutex);
ib_close_qp(qp);
if (atomic_read(&real_qp->usecnt) == 0)
list_del(&real_qp->xrcd_list);
else
real_qp = NULL;
mutex_unlock(&xrcd->tgt_qp_mutex);
if (real_qp) {
ret = ib_destroy_qp(real_qp);
if (!ret)
atomic_dec(&xrcd->usecnt);
else
__ib_insert_xrcd_qp(xrcd, real_qp);
}
return 0;
}
int ib_destroy_qp(struct ib_qp *qp)
{
struct ib_pd *pd;
struct ib_cq *scq, *rcq;
struct ib_srq *srq;
struct ib_xrcd *xrcd;
enum ib_qp_type qp_type = qp->qp_type;
int ret;
if (atomic_read(&qp->usecnt))
return -EBUSY;
if (qp->real_qp != qp)
return __ib_destroy_shared_qp(qp);
pd = qp->pd;
scq = qp->send_cq;
rcq = qp->recv_cq;
srq = qp->srq;
xrcd = qp->xrcd;
ret = qp->device->destroy_qp(qp);
if (!ret) {
if (pd)
atomic_dec(&pd->usecnt);
if (scq)
atomic_dec(&scq->usecnt);
if (rcq)
atomic_dec(&rcq->usecnt);
if (srq)
atomic_dec(&srq->usecnt);
if (qp_type == IB_QPT_XRC)
atomic_dec(&xrcd->usecnt);
}
return ret;
@ -726,8 +1019,13 @@ struct ib_cq *ib_create_cq(struct ib_device *device,
void *cq_context, int cqe, int comp_vector)
{
struct ib_cq *cq;
struct ib_cq_init_attr attr = {
.cqe = cqe,
.comp_vector = comp_vector,
.flags = 0,
};
cq = device->create_cq(device, cqe, comp_vector, NULL, NULL);
cq = device->create_cq(device, &attr, NULL, NULL);
if (!IS_ERR(cq)) {
cq->device = device;
@ -742,10 +1040,12 @@ struct ib_cq *ib_create_cq(struct ib_device *device,
}
EXPORT_SYMBOL(ib_create_cq);
int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
int ib_modify_cq(struct ib_cq *cq,
struct ib_cq_attr *cq_attr,
int cq_attr_mask)
{
return cq->device->modify_cq ?
cq->device->modify_cq(cq, cq_count, cq_period) : -ENOSYS;
cq->device->modify_cq(cq, cq_attr, cq_attr_mask) : -ENOSYS;
}
EXPORT_SYMBOL(ib_modify_cq);
@ -770,6 +1070,11 @@ EXPORT_SYMBOL(ib_resize_cq);
struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
{
struct ib_mr *mr;
int err;
err = ib_check_mr_access(mr_access_flags);
if (err)
return ERR_PTR(err);
mr = pd->device->get_dma_mr(pd, mr_access_flags);
@ -792,6 +1097,11 @@ struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,
u64 *iova_start)
{
struct ib_mr *mr;
int err;
err = ib_check_mr_access(mr_access_flags);
if (err)
return ERR_PTR(err);
if (!pd->device->reg_phys_mr)
return ERR_PTR(-ENOSYS);
@ -822,6 +1132,10 @@ int ib_rereg_phys_mr(struct ib_mr *mr,
struct ib_pd *old_pd;
int ret;
ret = ib_check_mr_access(mr_access_flags);
if (ret)
return ret;
if (!mr->device->rereg_phys_mr)
return -ENOSYS;
@ -867,6 +1181,45 @@ int ib_dereg_mr(struct ib_mr *mr)
}
EXPORT_SYMBOL(ib_dereg_mr);
struct ib_mr *ib_create_mr(struct ib_pd *pd,
struct ib_mr_init_attr *mr_init_attr)
{
struct ib_mr *mr;
if (!pd->device->create_mr)
return ERR_PTR(-ENOSYS);
mr = pd->device->create_mr(pd, mr_init_attr);
if (!IS_ERR(mr)) {
mr->device = pd->device;
mr->pd = pd;
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
atomic_set(&mr->usecnt, 0);
}
return mr;
}
EXPORT_SYMBOL(ib_create_mr);
int ib_destroy_mr(struct ib_mr *mr)
{
struct ib_pd *pd;
int ret;
if (atomic_read(&mr->usecnt))
return -EBUSY;
pd = mr->pd;
ret = mr->device->destroy_mr(mr);
if (!ret)
atomic_dec(&pd->usecnt);
return ret;
}
EXPORT_SYMBOL(ib_destroy_mr);
struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
{
struct ib_mr *mr;
@ -915,18 +1268,19 @@ EXPORT_SYMBOL(ib_free_fast_reg_page_list);
/* Memory windows */
struct ib_mw *ib_alloc_mw(struct ib_pd *pd)
struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
{
struct ib_mw *mw;
if (!pd->device->alloc_mw)
return ERR_PTR(-ENOSYS);
mw = pd->device->alloc_mw(pd);
mw = pd->device->alloc_mw(pd, type);
if (!IS_ERR(mw)) {
mw->device = pd->device;
mw->pd = pd;
mw->uobject = NULL;
mw->type = type;
atomic_inc(&pd->usecnt);
}
@ -1000,59 +1354,59 @@ EXPORT_SYMBOL(ib_dealloc_fmr);
int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
{
int ret;
if (!qp->device->attach_mcast)
return -ENOSYS;
switch (rdma_node_get_transport(qp->device->node_type)) {
case RDMA_TRANSPORT_IB:
if (qp->qp_type == IB_QPT_RAW_PACKET) {
/* In raw Etherent mgids the 63 msb's should be 0 */
if (gid->global.subnet_prefix & cpu_to_be64(~1ULL))
return -EINVAL;
} else if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
if ((gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) &&
qp->qp_type != IB_QPT_RAW_PACKET)
return -EINVAL;
break;
case RDMA_TRANSPORT_IWARP:
case RDMA_TRANSPORT_SCIF:
if (qp->qp_type != IB_QPT_RAW_PACKET)
return -EINVAL;
break;
}
return qp->device->attach_mcast(qp, gid, lid);
ret = qp->device->attach_mcast(qp, gid, lid);
if (!ret)
atomic_inc(&qp->usecnt);
return ret;
}
EXPORT_SYMBOL(ib_attach_mcast);
int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
{
int ret;
if (!qp->device->detach_mcast)
return -ENOSYS;
switch (rdma_node_get_transport(qp->device->node_type)) {
case RDMA_TRANSPORT_IB:
if (qp->qp_type == IB_QPT_RAW_PACKET) {
/* In raw Etherent mgids the 63 msb's should be 0 */
if (gid->global.subnet_prefix & cpu_to_be64(~1ULL))
return -EINVAL;
} else if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
if ((gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) &&
qp->qp_type != IB_QPT_RAW_PACKET)
return -EINVAL;
break;
case RDMA_TRANSPORT_IWARP:
case RDMA_TRANSPORT_SCIF:
if (qp->qp_type != IB_QPT_RAW_PACKET)
return -EINVAL;
break;
}
return qp->device->detach_mcast(qp, gid, lid);
ret = qp->device->detach_mcast(qp, gid, lid);
if (!ret)
atomic_dec(&qp->usecnt);
return ret;
}
EXPORT_SYMBOL(ib_detach_mcast);
int ib_dealloc_xrcd(struct ib_xrcd *xrcd)
{
if (atomic_read(&xrcd->usecnt))
return -EBUSY;
return xrcd->device->dealloc_xrcd(xrcd);
}
EXPORT_SYMBOL(ib_dealloc_xrcd);
struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device)
{
struct ib_xrcd *xrcd;
@ -1064,10 +1418,119 @@ struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device)
if (!IS_ERR(xrcd)) {
xrcd->device = device;
xrcd->inode = NULL;
xrcd->uobject = NULL;
atomic_set(&xrcd->usecnt, 0);
mutex_init(&xrcd->tgt_qp_mutex);
INIT_LIST_HEAD(&xrcd->tgt_qp_list);
}
return xrcd;
}
EXPORT_SYMBOL(ib_alloc_xrcd);
int ib_dealloc_xrcd(struct ib_xrcd *xrcd)
{
struct ib_qp *qp;
int ret;
if (atomic_read(&xrcd->usecnt))
return -EBUSY;
while (!list_empty(&xrcd->tgt_qp_list)) {
qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list);
ret = ib_destroy_qp(qp);
if (ret)
return ret;
}
return xrcd->device->dealloc_xrcd(xrcd);
}
EXPORT_SYMBOL(ib_dealloc_xrcd);
struct ib_flow *ib_create_flow(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
int domain)
{
struct ib_flow *flow_id;
if (!qp->device->create_flow)
return ERR_PTR(-ENOSYS);
flow_id = qp->device->create_flow(qp, flow_attr, domain);
if (!IS_ERR(flow_id))
atomic_inc(&qp->usecnt);
return flow_id;
}
EXPORT_SYMBOL(ib_create_flow);
int ib_destroy_flow(struct ib_flow *flow_id)
{
int err;
struct ib_qp *qp;
if (!flow_id)
return -EINVAL;
qp = flow_id->qp;
if (!qp->device->destroy_flow)
return -ENOSYS;
err = qp->device->destroy_flow(flow_id);
if (!err)
atomic_dec(&qp->usecnt);
return err;
}
EXPORT_SYMBOL(ib_destroy_flow);
struct ib_dct *ib_create_dct(struct ib_pd *pd, struct ib_dct_init_attr *attr,
struct ib_udata *udata)
{
struct ib_dct *dct;
if (!pd->device->exp_create_dct)
return ERR_PTR(-ENOSYS);
dct = pd->device->exp_create_dct(pd, attr, udata);
if (!IS_ERR(dct)) {
dct->pd = pd;
dct->srq = attr->srq;
dct->cq = attr->cq;
atomic_inc(&dct->srq->usecnt);
atomic_inc(&dct->cq->usecnt);
atomic_inc(&dct->pd->usecnt);
}
return dct;
}
EXPORT_SYMBOL(ib_create_dct);
int ib_destroy_dct(struct ib_dct *dct)
{
int err;
if (!dct->device->exp_destroy_dct)
return -ENOSYS;
err = dct->device->exp_destroy_dct(dct);
if (!err) {
atomic_dec(&dct->srq->usecnt);
atomic_dec(&dct->cq->usecnt);
atomic_dec(&dct->pd->usecnt);
}
return err;
}
EXPORT_SYMBOL(ib_destroy_dct);
int ib_query_dct(struct ib_dct *dct, struct ib_dct_attr *attr)
{
if (!dct->device->exp_query_dct)
return -ENOSYS;
return dct->device->exp_query_dct(dct, attr);
}
EXPORT_SYMBOL(ib_query_dct);
int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status)
{
return mr->device->check_mr_status ?
mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS;
}
EXPORT_SYMBOL(ib_check_mr_status);

File diff suppressed because it is too large Load Diff

View File

@ -22,24 +22,85 @@
#ifndef H_MEMTRACK_H
#define H_MEMTRACK_H
typedef enum {
enum memtrack_memtype_t {
MEMTRACK_KMALLOC,
MEMTRACK_VMALLOC,
MEMTRACK_KMEM_OBJ,
MEMTRACK_IOREMAP, /* IO-RE/UN-MAP */
MEMTRACK_WORK_QUEUE, /* Handle work-queue create & destroy */
MEMTRACK_PAGE_ALLOC, /* Handle page allocation and free */
MEMTRACK_DMA_MAP_SINGLE,/* Handle ib_dma_single map and unmap */
MEMTRACK_DMA_MAP_PAGE, /* Handle ib_dma_page map and unmap */
MEMTRACK_DMA_MAP_SG, /* Handle ib_dma_sg map and unmap with and without attributes */
MEMTRACK_NUM_OF_MEMTYPES
} memtrack_memtype_t;
};
/* Invoke on memory allocation */
void memtrack_alloc(memtrack_memtype_t memtype, unsigned long addr,
unsigned long size, const char *filename,
void memtrack_alloc(enum memtrack_memtype_t memtype, unsigned long dev,
unsigned long addr, unsigned long size, unsigned long addr2,
int direction, const char *filename,
const unsigned long line_num, int alloc_flags);
/* Invoke on memory free */
void memtrack_free(memtrack_memtype_t memtype, unsigned long addr,
void memtrack_free(enum memtrack_memtype_t memtype, unsigned long dev,
unsigned long addr, unsigned long size, int direction,
const char *filename, const unsigned long line_num);
/*
* This function recognizes allocations which
* may be released by kernel (e.g. skb & vnic) and
* therefore not trackable by memtrack.
* The allocations are recognized by the name
* of their calling function.
*/
int is_non_trackable_alloc_func(const char *func_name);
/*
* In some cases we need to free a memory
* we defined as "non trackable" (see
* is_non_trackable_alloc_func).
* This function recognizes such releases
* by the name of their calling function.
*/
int is_non_trackable_free_func(const char *func_name);
/* WA - In this function handles confirm
the the function name is
'__ib_umem_release' or 'ib_umem_get'
In this case we won't track the
memory there because the kernel
was the one who allocated it.
Return value:
1 - if the function name is match, else 0 */
int is_umem_put_page(const char *func_name);
/* Check page order size
When Freeing a page allocation it checks whether
we are trying to free the same amount of pages
we ask to allocate (In log2(order)).
In case an error if found it will print
an error msg */
int memtrack_check_size(enum memtrack_memtype_t memtype, unsigned long addr,
unsigned long size, const char *filename,
const unsigned long line_num);
/* Search for a specific addr whether it exist in the
current data-base.
If not it will print an error msg,
Return value: 0 - if addr exist, else 1 */
int memtrack_is_new_addr(enum memtrack_memtype_t memtype, unsigned long addr, int expect_exist,
const char *filename, const unsigned long line_num);
/* Return current page reference counter */
int memtrack_get_page_ref_count(unsigned long addr);
/* Report current allocations status (for all memory types) */
/* we do not export this function since it is used by cleanup_module only */
/* void memtrack_report(void); */
/* Allow support of error injections */
int memtrack_inject_error(void);
/* randomize allocated memory */
int memtrack_randomize_mem(void);
#endif

View File

@ -1,46 +1,84 @@
#ifndef __mtrack_h_
#define __mtrack_h_
#include <memtrack.h>
#include "memtrack.h"
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/version.h>
#include <linux/kernel.h>
#include <linux/io.h> /* For ioremap_nocache, ioremap, iounmap */
#include <linux/random.h>
#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 27)
# include <linux/io-mapping.h> /* For ioremap_nocache, ioremap, iounmap */
#endif
#include <linux/mm.h> /* For all page handling */
#include <linux/workqueue.h> /* For all work-queue handling */
#include <linux/scatterlist.h> /* For using scatterlists */
#include <linux/skbuff.h> /* For skbufs handling */
#include <asm/uaccess.h> /* For copy from/to user */
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)
#define MEMTRACK_ERROR_INJECTION_MESSAGE(file, line, func) ({ \
printk(KERN_ERR "%s failure injected at %s:%d\n", func, file, line); \
dump_stack(); \
})
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 14)
#define RDMA_KZALLOC_H
#define kzalloc(size, flags) ({ \
void *__memtrack_kz_addr; \
void *__memtrack_kz_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kzalloc");\
else \
__memtrack_kz_addr = kmalloc(size, flags); \
if ( __memtrack_kz_addr ) { \
memset( __memtrack_kz_addr, 0, size) ; \
if (__memtrack_kz_addr && !is_non_trackable_alloc_func(__func__)) { \
memset(__memtrack_kz_addr, 0, size); \
} \
__memtrack_kz_addr; \
})
#else
#define kzalloc(size, flags) ({ \
void *__memtrack_addr; \
void *__memtrack_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kzalloc");\
else \
__memtrack_addr = kzalloc(size, flags); \
if ( __memtrack_addr && (size)) { \
memtrack_alloc(MEMTRACK_KMALLOC, (unsigned long)(__memtrack_addr), size, __FILE__, __LINE__, flags); \
if (__memtrack_addr && !is_non_trackable_alloc_func(__func__)) { \
memtrack_alloc(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, flags); \
} \
__memtrack_addr; \
})
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
#define kzalloc_node(size, flags, node) ({ \
void *__memtrack_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kzalloc_node"); \
else \
__memtrack_addr = kzalloc_node(size, flags, node); \
if (__memtrack_addr && (size) && \
!is_non_trackable_alloc_func(__func__)) { \
memtrack_alloc(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, flags); \
} \
__memtrack_addr; \
})
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 19)
#define kcalloc(n, size, flags) kzalloc((n)*(size), flags)
#else
#define kcalloc(n, size, flags) ({ \
void *__memtrack_addr; \
void *__memtrack_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kcalloc");\
else \
__memtrack_addr = kcalloc(n, size, flags); \
if ( __memtrack_addr && (size)) { \
memtrack_alloc(MEMTRACK_KMALLOC, (unsigned long)(__memtrack_addr), (n)*(size), __FILE__, __LINE__, flags); \
if (__memtrack_addr && (size)) { \
memtrack_alloc(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), (n)*(size), 0UL, 0, __FILE__, __LINE__, flags); \
} \
__memtrack_addr; \
})
@ -50,76 +88,208 @@
#ifdef ZERO_OR_NULL_PTR
#define kmalloc(sz, flgs) ({ \
void *__memtrack_addr; \
void *__memtrack_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kmalloc");\
else \
__memtrack_addr = kmalloc(sz, flgs); \
if ( !ZERO_OR_NULL_PTR(__memtrack_addr)) { \
memtrack_alloc(MEMTRACK_KMALLOC, (unsigned long)(__memtrack_addr), sz, __FILE__, __LINE__, flgs); \
if (!ZERO_OR_NULL_PTR(__memtrack_addr)) { \
memtrack_alloc(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), sz, 0UL, 0, __FILE__, __LINE__, flgs); \
if (memtrack_randomize_mem()) \
get_random_bytes(__memtrack_addr, sz); \
} \
__memtrack_addr; \
})
#else
#define kmalloc(sz, flgs) ({ \
void *__memtrack_addr; \
void *__memtrack_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kmalloc");\
else \
__memtrack_addr = kmalloc(sz, flgs); \
if ( __memtrack_addr ) { \
memtrack_alloc(MEMTRACK_KMALLOC, (unsigned long)(__memtrack_addr), sz, __FILE__, __LINE__, flgs); \
if (__memtrack_addr) { \
memtrack_alloc(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), sz, 0UL, 0, __FILE__, __LINE__, flgs); \
if (memtrack_randomize_mem()) \
get_random_bytes(__memtrack_addr, sz); \
} \
__memtrack_addr; \
})
#endif
#define kmalloc_node(sz, flgs, node) ({ \
void *__memtrack_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kmalloc_node"); \
else \
__memtrack_addr = kmalloc_node(sz, flgs, node); \
if (__memtrack_addr) { \
memtrack_alloc(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), sz, 0UL, 0, __FILE__, __LINE__, flgs); \
if (memtrack_randomize_mem() && ((flgs) == GFP_KERNEL)) \
get_random_bytes(__memtrack_addr, sz); \
} \
__memtrack_addr; \
})
#ifdef ZERO_OR_NULL_PTR
#define kmemdup(src, sz, flgs) ({ \
void *__memtrack_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kmemdup");\
else \
__memtrack_addr = kmemdup(src, sz, flgs); \
if (!ZERO_OR_NULL_PTR(__memtrack_addr)) { \
memtrack_alloc(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), sz, 0UL, 0, __FILE__, __LINE__, flgs); \
} \
__memtrack_addr; \
})
#else
#define kmemdup(src, sz, flgs) ({ \
void *__memtrack_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kmemdup");\
else \
__memtrack_addr = kmemdup(src, sz, flgs); \
if (__memtrack_addr) { \
memtrack_alloc(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), sz, 0UL, 0, __FILE__, __LINE__, flgs); \
} \
__memtrack_addr; \
})
#endif
#ifdef ZERO_OR_NULL_PTR
#define kfree(addr) ({ \
void *__memtrack_addr = (void *)addr; \
if ( !ZERO_OR_NULL_PTR(__memtrack_addr) ) { \
memtrack_free(MEMTRACK_KMALLOC, (unsigned long)(__memtrack_addr), __FILE__, __LINE__); \
\
if (!ZERO_OR_NULL_PTR(__memtrack_addr) && \
!is_non_trackable_free_func(__func__)) { \
memtrack_free(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \
} \
kfree(__memtrack_addr); \
})
#else
#define kfree(addr) ({ \
void *__memtrack_addr = (void *)addr; \
if ( __memtrack_addr ) { \
memtrack_free(MEMTRACK_KMALLOC, (unsigned long)(__memtrack_addr), __FILE__, __LINE__); \
\
if (__memtrack_addr && !is_non_trackable_free_func(__func__)) { \
memtrack_free(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \
} \
kfree(__memtrack_addr); \
})
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 0) || defined (CONFIG_COMPAT_RCU)
#ifdef kfree_rcu
#undef kfree_rcu
#endif
#ifdef ZERO_OR_NULL_PTR
#define kfree_rcu(addr, rcu_head) ({ \
void *__memtrack_addr = (void *)addr; \
\
if (!ZERO_OR_NULL_PTR(__memtrack_addr) && \
!is_non_trackable_free_func(__func__)) { \
memtrack_free(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \
} \
__kfree_rcu(&((addr)->rcu_head), offsetof(typeof(*(addr)), rcu_head)); \
})
#else
#define kfree_rcu(addr, rcu_head) ({ \
void *__memtrack_addr = (void *)addr; \
\
if (__memtrack_addr && !is_non_trackable_free_func(__func__)) { \
memtrack_free(MEMTRACK_KMALLOC, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \
} \
__kfree_rcu(&((addr)->rcu_head), offsetof(typeof(*(addr)), rcu_head)); \
})
#endif
#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0) */
#define vmalloc(size) ({ \
void *__memtrack_addr; \
void *__memtrack_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "vmalloc");\
else \
__memtrack_addr = vmalloc(size); \
if ( __memtrack_addr ) { \
memtrack_alloc(MEMTRACK_VMALLOC, (unsigned long)(__memtrack_addr), size, __FILE__, __LINE__, GFP_ATOMIC); \
if (__memtrack_addr) { \
memtrack_alloc(MEMTRACK_VMALLOC, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
if (memtrack_randomize_mem()) \
get_random_bytes(__memtrack_addr, size); \
} \
__memtrack_addr; \
})
#ifndef vzalloc
#define vzalloc(size) ({ \
void *__memtrack_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "vzalloc");\
else \
__memtrack_addr = vzalloc(size); \
if (__memtrack_addr) { \
memtrack_alloc(MEMTRACK_VMALLOC, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
__memtrack_addr; \
})
#endif
#ifndef vzalloc_node
#define vzalloc_node(size, node) ({ \
void *__memtrack_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "vzalloc_node"); \
else \
__memtrack_addr = vzalloc_node(size, node); \
if (__memtrack_addr) { \
memtrack_alloc(MEMTRACK_VMALLOC, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
if (memtrack_randomize_mem()) \
get_random_bytes(__memtrack_addr, size); \
} \
__memtrack_addr; \
})
#endif
#define vmalloc_node(size, node) ({ \
void *__memtrack_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "vmalloc_node"); \
else \
__memtrack_addr = vmalloc_node(size, node); \
if (__memtrack_addr) { \
memtrack_alloc(MEMTRACK_VMALLOC, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
if (memtrack_randomize_mem()) \
get_random_bytes(__memtrack_addr, size); \
} \
__memtrack_addr; \
})
#define vfree(addr) ({ \
void *__memtrack_addr = (void *)addr; \
if ( __memtrack_addr ) { \
memtrack_free(MEMTRACK_VMALLOC, (unsigned long)(__memtrack_addr), __FILE__, __LINE__); \
if (__memtrack_addr) { \
memtrack_free(MEMTRACK_VMALLOC, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \
} \
vfree(__memtrack_addr); \
})
#define kmem_cache_alloc(cache, flags) ({ \
void *__memtrack_addr; \
void *__memtrack_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "kmem_cache_alloc"); \
else \
__memtrack_addr = kmem_cache_alloc(cache, flags); \
if ( __memtrack_addr ) { \
memtrack_alloc(MEMTRACK_KMEM_OBJ, (unsigned long)(__memtrack_addr), 1, __FILE__, __LINE__, flags); \
if (__memtrack_addr) { \
memtrack_alloc(MEMTRACK_KMEM_OBJ, 0UL, (unsigned long)(__memtrack_addr), 1, 0UL, 0, __FILE__, __LINE__, flags); \
} \
__memtrack_addr; \
})
@ -127,12 +297,548 @@
#define kmem_cache_free(cache, addr) ({ \
void *__memtrack_addr = (void *)addr; \
if ( __memtrack_addr ) { \
memtrack_free(MEMTRACK_KMEM_OBJ, (unsigned long)(__memtrack_addr), __FILE__, __LINE__); \
\
if (__memtrack_addr) { \
memtrack_free(MEMTRACK_KMEM_OBJ, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \
} \
kmem_cache_free(cache, __memtrack_addr); \
})
/* All IO-MAP handling */
#define ioremap(phys_addr, size) ({ \
void __iomem *__memtrack_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "ioremap");\
else \
__memtrack_addr = ioremap(phys_addr, size); \
if (__memtrack_addr) { \
memtrack_alloc(MEMTRACK_IOREMAP, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
__memtrack_addr; \
})
#define io_mapping_create_wc(base, size) ({ \
void __iomem *__memtrack_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "io_mapping_create_wc"); \
else \
__memtrack_addr = io_mapping_create_wc(base, size); \
if (__memtrack_addr) { \
memtrack_alloc(MEMTRACK_IOREMAP, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
__memtrack_addr; \
})
#define io_mapping_free(addr) ({ \
void *__memtrack_addr = (void *)addr; \
\
if (__memtrack_addr) { \
memtrack_free(MEMTRACK_IOREMAP, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \
} \
io_mapping_free(__memtrack_addr); \
})
#ifdef CONFIG_PPC
#ifdef ioremap_nocache
#undef ioremap_nocache
#endif
#define ioremap_nocache(phys_addr, size) ({ \
void __iomem *__memtrack_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "ioremap_nocache"); \
else \
__memtrack_addr = ioremap(phys_addr, size); \
if (__memtrack_addr) { \
memtrack_alloc(MEMTRACK_IOREMAP, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
__memtrack_addr; \
})
#else
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 18) /* 2.6.16 - 2.6.17 */
#ifdef ioremap_nocache
#undef ioremap_nocache
#endif
#define ioremap_nocache(phys_addr, size) ({ \
void __iomem *__memtrack_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "ioremap_nocache"); \
else \
__memtrack_addr = ioremap(phys_addr, size); \
if (__memtrack_addr) { \
memtrack_alloc(MEMTRACK_IOREMAP, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
__memtrack_addr; \
})
#else
#define ioremap_nocache(phys_addr, size) ({ \
void __iomem *__memtrack_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "ioremap_nocache"); \
else \
__memtrack_addr = ioremap_nocache(phys_addr, size); \
if (__memtrack_addr) { \
memtrack_alloc(MEMTRACK_IOREMAP, 0UL, (unsigned long)(__memtrack_addr), size, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
__memtrack_addr; \
})
#endif /* Kernel version is under 2.6.18 */
#endif /* PPC */
#define iounmap(addr) ({ \
void *__memtrack_addr = (void *)addr; \
\
if (__memtrack_addr) { \
memtrack_free(MEMTRACK_IOREMAP, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \
} \
iounmap(__memtrack_addr); \
})
/* All Page handlers */
/* TODO: Catch netif_rx for page dereference */
#define alloc_pages_node(nid, gfp_mask, order) ({ \
struct page *page_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "alloc_pages_node"); \
else \
page_addr = (struct page *)alloc_pages_node(nid, gfp_mask, order); \
if (page_addr && !is_non_trackable_alloc_func(__func__)) { \
memtrack_alloc(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(page_addr), (unsigned long)(order), 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
page_addr; \
})
#ifdef CONFIG_NUMA
#define alloc_pages(gfp_mask, order) ({ \
struct page *page_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "alloc_pages"); \
else \
page_addr = (struct page *)alloc_pages(gfp_mask, order); \
if (page_addr && !is_non_trackable_alloc_func(__func__)) { \
memtrack_alloc(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(page_addr), (unsigned long)(order), 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
page_addr; \
})
#else
#ifdef alloc_pages
#undef alloc_pages
#endif
#define alloc_pages(gfp_mask, order) ({ \
struct page *page_addr; \
\
page_addr = (struct page *)alloc_pages_node(numa_node_id(), gfp_mask, order); \
page_addr; \
})
#endif
#define __get_free_pages(gfp_mask, order) ({ \
struct page *page_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "__get_free_pages"); \
else \
page_addr = (struct page *)__get_free_pages(gfp_mask, order); \
if (page_addr && !is_non_trackable_alloc_func(__func__)) { \
memtrack_alloc(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(page_addr), (unsigned long)(order), 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
page_addr; \
})
#define get_zeroed_page(gfp_mask) ({ \
struct page *page_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "get_zeroed_page"); \
else \
page_addr = (struct page *)get_zeroed_page(gfp_mask); \
if (page_addr && !is_non_trackable_alloc_func(__func__)) { \
memtrack_alloc(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(page_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
(unsigned long)page_addr; \
})
#define __free_pages(addr, order) ({ \
void *__memtrack_addr = (void *)addr; \
\
if (__memtrack_addr && !is_non_trackable_alloc_func(__func__)) { \
if (!memtrack_check_size(MEMTRACK_PAGE_ALLOC, (unsigned long)(__memtrack_addr), (unsigned long)(order), __FILE__, __LINE__)) \
memtrack_free(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \
} \
__free_pages(addr, order); \
})
#define free_pages(addr, order) ({ \
void *__memtrack_addr = (void *)addr; \
\
if (__memtrack_addr && !is_non_trackable_alloc_func(__func__)) { \
if (!memtrack_check_size(MEMTRACK_PAGE_ALLOC, (unsigned long)(__memtrack_addr), (unsigned long)(order), __FILE__, __LINE__)) \
memtrack_free(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \
} \
free_pages(addr, order); \
})
#define get_page(addr) ({ \
void *__memtrack_addr = (void *)addr; \
\
if (__memtrack_addr && !is_non_trackable_alloc_func(__func__)) { \
if (memtrack_is_new_addr(MEMTRACK_PAGE_ALLOC, (unsigned long)(__memtrack_addr), 0, __FILE__, __LINE__)) { \
memtrack_alloc(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(__memtrack_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
} \
get_page(addr); \
})
#define get_user_pages_fast(start, nr_pages, write, pages) ({ \
int __memtrack_rc = -1; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "get_user_pages_fast"); \
else \
__memtrack_rc = get_user_pages_fast(start, nr_pages, write, pages); \
if (__memtrack_rc > 0 && !is_non_trackable_alloc_func(__func__)) { \
int __memtrack_i; \
\
for (__memtrack_i = 0; __memtrack_i < __memtrack_rc; __memtrack_i++) \
memtrack_alloc(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(pages[__memtrack_i]), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
__memtrack_rc; \
})
#define put_page(addr) ({ \
void *__memtrack_addr = (void *)addr; \
\
if (__memtrack_addr && !is_non_trackable_alloc_func(__func__)) { \
/* Check whether this is not part of umem put page & not */\
/* a new addr and the ref-count is 1 then we'll free this addr */\
/* Don't change the order these conditions */ \
if (!is_umem_put_page(__func__) && \
!memtrack_is_new_addr(MEMTRACK_PAGE_ALLOC, (unsigned long)(__memtrack_addr), 1, __FILE__, __LINE__) && \
(memtrack_get_page_ref_count((unsigned long)(__memtrack_addr)) == 1)) { \
memtrack_free(MEMTRACK_PAGE_ALLOC, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \
} \
} \
put_page(addr); \
})
/* Work-Queue handlers */
#ifdef create_workqueue
#undef create_workqueue
#endif
#ifdef create_rt_workqueue
#undef create_rt_workqueue
#endif
#ifdef create_freezeable_workqueue
#undef create_freezeable_workqueue
#endif
#ifdef create_singlethread_workqueue
#undef create_singlethread_workqueue
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) /* 2.6.18 - 2.6.19 */
#define create_workqueue(name) ({ \
struct workqueue_struct *wq_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_workqueue"); \
else \
wq_addr = __create_workqueue((name), 0); \
if (wq_addr) { \
memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
wq_addr; \
})
#define create_singlethread_workqueue(name) ({ \
struct workqueue_struct *wq_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_singlethread_workqueue"); \
else \
wq_addr = __create_workqueue((name), 1); \
if (wq_addr) { \
memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
wq_addr; \
})
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28) /* 2.6.20 - 2.6.27 */
#define create_workqueue(name) ({ \
struct workqueue_struct *wq_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_workqueue"); \
else \
wq_addr = __create_workqueue((name), 0, 0); \
if (wq_addr) { \
memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
wq_addr; \
})
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 22) /* 2.6.20 - 2.6.21 */
#define create_freezeable_workqueue(name) ({ \
struct workqueue_struct *wq_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_freezeable_workqueue"); \
else \
wq_addr = __create_workqueue((name), 0, 1); \
if (wq_addr) { \
memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
wq_addr; \
})
#else /* 2.6.22 - 2.6.27 */
#define create_freezeable_workqueue(name) ({ \
struct workqueue_struct *wq_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_freezeable_workqueue"); \
else \
wq_addr = __create_workqueue((name), 1, 1); \
if (wq_addr) { \
memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
wq_addr; \
})
#endif /* 2.6.20 - 2.6.27 */
#define create_singlethread_workqueue(name) ({ \
struct workqueue_struct *wq_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_singlethread_workqueue"); \
else \
wq_addr = __create_workqueue((name), 1, 0); \
if (wq_addr) { \
memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
wq_addr; \
})
#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) /* 2.6.28 - 2.6.35 */
#ifdef alloc_workqueue
#undef alloc_workqueue
#endif
#define alloc_workqueue(name, flags, max_active) ({ \
struct workqueue_struct *wq_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "alloc_workqueue"); \
else \
wq_addr = __create_workqueue((name), (flags), (max_active), 0); \
if (wq_addr) { \
memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
wq_addr; \
})
#define create_workqueue(name) ({ \
struct workqueue_struct *wq_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_workqueue"); \
else \
wq_addr = __create_workqueue((name), 0, 0, 0); \
if (wq_addr) { \
memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
wq_addr; \
})
#define create_rt_workqueue(name) ({ \
struct workqueue_struct *wq_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_rt_workqueue"); \
else \
wq_addr = __create_workqueue((name), 0, 0, 1); \
if (wq_addr) { \
memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
wq_addr; \
})
#define create_freezeable_workqueue(name) ({ \
struct workqueue_struct *wq_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_freezeable_workqueue"); \
else \
wq_addr = __create_workqueue((name), 1, 1, 0); \
if (wq_addr) { \
memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
wq_addr; \
})
#define create_singlethread_workqueue(name) ({ \
struct workqueue_struct *wq_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "create_singlethread_workqueue"); \
else \
wq_addr = __create_workqueue((name), 1, 0, 0); \
if (wq_addr) { \
memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
wq_addr; \
})
#else /* 2.6.36 */
#ifdef alloc_workqueue
#undef alloc_workqueue
#endif
#ifdef CONFIG_LOCKDEP
#define alloc_workqueue(name, flags, max_active) \
({ \
static struct lock_class_key __key; \
const char *__lock_name; \
struct workqueue_struct *wq_addr = NULL; \
\
if (__builtin_constant_p(name)) \
__lock_name = (name); \
else \
__lock_name = #name; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "alloc_workqueue"); \
else \
wq_addr = __alloc_workqueue_key((name), (flags), (max_active), \
&__key, __lock_name); \
if (wq_addr) { \
memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
wq_addr; \
})
#else
#define alloc_workqueue(name, flags, max_active) ({ \
struct workqueue_struct *wq_addr = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "alloc_workqueue"); \
else \
wq_addr = __alloc_workqueue_key((name), (flags), (max_active), NULL, NULL); \
if (wq_addr) { \
memtrack_alloc(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(wq_addr), 0, 0UL, 0, __FILE__, __LINE__, GFP_ATOMIC); \
} \
wq_addr; \
})
#endif
#define create_workqueue(name) \
alloc_workqueue((name), WQ_RESCUER, 1);
#define create_freezeable_workqueue(name) \
alloc_workqueue((name), WQ_FREEZEABLE | WQ_UNBOUND | WQ_RESCUER, 1);
#define create_singlethread_workqueue(name) \
alloc_workqueue((name), WQ_UNBOUND | WQ_RESCUER, 1);
#endif /* Work-Queue Kernel Versions */
#define destroy_workqueue(wq_addr) ({ \
void *__memtrack_addr = (void *)wq_addr; \
\
if (__memtrack_addr) { \
memtrack_free(MEMTRACK_WORK_QUEUE, 0UL, (unsigned long)(__memtrack_addr), 0UL, 0, __FILE__, __LINE__); \
} \
destroy_workqueue(wq_addr); \
})
/* ONLY error injection to functions that we don't monitor */
#define alloc_skb(size, prio) ({ \
struct sk_buff *__memtrack_skb = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "alloc_skb"); \
else \
__memtrack_skb = alloc_skb(size, prio); \
__memtrack_skb; \
})
#define dev_alloc_skb(size) ({ \
struct sk_buff *__memtrack_skb = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "dev_alloc_skb"); \
else \
__memtrack_skb = dev_alloc_skb(size); \
__memtrack_skb; \
})
#define alloc_skb_fclone(size, prio) ({ \
struct sk_buff *__memtrack_skb = NULL; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "alloc_skb_fclone"); \
else \
__memtrack_skb = alloc_skb_fclone(size, prio); \
__memtrack_skb; \
})
#define copy_from_user(to, from, n) ({ \
int ret = n; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "copy_from_user"); \
else \
ret = copy_from_user(to, from, n); \
ret; \
})
#define copy_to_user(to, from, n) ({ \
int ret = n; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "copy_to_user"); \
else \
ret = copy_to_user(to, from, n); \
ret; \
})
#define sysfs_create_file(kobj, attr) ({ \
int ret = -ENOSYS; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "sysfs_create_file"); \
else \
ret = sysfs_create_file(kobj, attr); \
ret; \
})
#define sysfs_create_link(kobj, target, name) ({ \
int ret = -ENOSYS; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "sysfs_create_link"); \
else \
ret = sysfs_create_link(kobj, target, name); \
ret; \
})
#define sysfs_create_group(kobj, grp) ({ \
int ret = -ENOSYS; \
\
if (memtrack_inject_error()) \
MEMTRACK_ERROR_INJECTION_MESSAGE(__FILE__, __LINE__, "sysfs_create_group"); \
else \
ret = sysfs_create_group(kobj, grp); \
ret; \
})
#endif /* __mtrack_h_ */

View File

@ -1,31 +0,0 @@
# $FreeBSD$
#.PATH: ${.CURDIR}/../../ofed/drivers/infiniband/hw/mlx4
#.PATH: ${.CURDIR}/../../../../include/linux
.include <src.opts.mk>
KMOD = mlx4ib
SRCS = device_if.h bus_if.h pci_if.h vnode_if.h
#SRCS+= linux_compat.c linux_radix.c
SRCS+= ah.c cq.c doorbell.c mad.c main.c mr.c qp.c srq.c wc.c
SRCS+= opt_inet.h opt_inet6.h
#CFLAGS+= -I${.CURDIR}/../../ofed/include/
CFLAGS+= -I${.CURDIR}/../../../../include
CFLAGS+= -DCONFIG_INFINIBAND_USER_MEM
.if !defined(KERNBUILDDIR)
.if ${MK_INET_SUPPORT} != "no"
opt_inet.h:
@echo "#define INET 1" > ${.TARGET}
.endif
.if ${MK_INET6_SUPPORT} != "no"
opt_inet6.h:
@echo "#define INET6 1" > ${.TARGET}
.endif
.endif
.include <bsd.kmod.mk>
CFLAGS+= -Wno-cast-qual -Wno-pointer-arith ${GCC_MS_EXTENSIONS}

View File

@ -30,7 +30,6 @@
* SOFTWARE.
*/
#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
@ -95,21 +94,18 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
{
struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
struct mlx4_dev *dev = ibdev->dev;
union ib_gid sgid;
u8 mac[6];
int err;
int is_mcast;
int is_mcast = 0;
struct in6_addr in6;
u16 vlan_tag;
err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num);
if (err)
return ERR_PTR(err);
memcpy(ah->av.eth.mac, mac, 6);
err = ib_get_cached_gid(pd->device, ah_attr->port_num, ah_attr->grh.sgid_index, &sgid);
if (err)
return ERR_PTR(err);
vlan_tag = rdma_get_vlan_id(&sgid);
memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
if (rdma_is_multicast_addr(&in6)) {
is_mcast = 1;
resolve_mcast_mac(&in6, ah->av.eth.mac);
} else {
memcpy(ah->av.eth.mac, ah_attr->dmac, 6);
}
vlan_tag = ah_attr->vlan_id;
if (vlan_tag < 0x1000)
vlan_tag |= (ah_attr->sl & 7) << 13;
ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));

View File

@ -57,6 +57,7 @@ struct mlx4_alias_guid_work_context {
int query_id;
struct list_head list;
int block_num;
u8 method;
};
struct mlx4_next_alias_guid_work {
@ -80,7 +81,8 @@ void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
ports_guid[port_num - 1].
all_rec_per_port[block_num].guid_indexes);
pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, (long long)guid_indexes);
pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num,
(unsigned long long)guid_indexes);
for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
/* The location of the specific index starts from bit number 4
@ -144,7 +146,8 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
ports_guid[port_num - 1].
all_rec_per_port[block_num].guid_indexes);
pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, (long long)guid_indexes);
pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num,
(unsigned long long)guid_indexes);
/*calculate the slaves and notify them*/
for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
@ -201,7 +204,7 @@ static void aliasguid_query_handler(int status,
{
struct mlx4_ib_dev *dev;
struct mlx4_alias_guid_work_context *cb_ctx = context;
u8 port_index ;
u8 port_index;
int i;
struct mlx4_sriov_alias_guid_info_rec_det *rec;
unsigned long flags, flags1;
@ -240,6 +243,18 @@ static void aliasguid_query_handler(int status,
for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) {
__be64 tmp_cur_ag;
tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE];
if ((cb_ctx->method == MLX4_GUID_INFO_RECORD_DELETE)
&& (MLX4_NOT_SET_GUID == tmp_cur_ag)) {
pr_debug("%s:Record num %d in block_num:%d "
"was deleted by SM,ownership by %d "
"(0 = driver, 1=sysAdmin, 2=None)\n",
__func__, i, guid_rec->block_num,
rec->ownership);
rec->guid_indexes = rec->guid_indexes &
~mlx4_ib_get_aguid_comp_mask_from_ix(i);
continue;
}
/* check if the SM didn't assign one of the records.
* if it didn't, if it was not sysadmin request:
* ask the SM to give a new GUID, (instead of the driver request).
@ -379,7 +394,7 @@ static int set_guid_rec(struct ib_device *ibdev,
callback_context->port = port;
callback_context->dev = dev;
callback_context->block_num = index;
callback_context->method = rec_det->method;
memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec));
guid_info_rec.lid = cpu_to_be16(attr.lid);

View File

@ -33,6 +33,7 @@
#include <rdma/ib_mad.h>
#include <linux/mlx4/cmd.h>
#include <linux/rbtree.h>
#include <linux/idr.h>
#include <rdma/ib_cm.h>
@ -60,6 +61,11 @@ struct cm_generic_msg {
__be32 remote_comm_id;
};
struct cm_sidr_generic_msg {
struct ib_mad_hdr hdr;
__be32 request_id;
};
struct cm_req_msg {
unsigned char unused[0x60];
union ib_gid primary_path_sgid;
@ -68,28 +74,62 @@ struct cm_req_msg {
static void set_local_comm_id(struct ib_mad *mad, u32 cm_id)
{
if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
struct cm_sidr_generic_msg *msg =
(struct cm_sidr_generic_msg *)mad;
msg->request_id = cpu_to_be32(cm_id);
} else if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
pr_err("trying to set local_comm_id in SIDR_REP\n");
return;
} else {
struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
msg->local_comm_id = cpu_to_be32(cm_id);
}
}
static u32 get_local_comm_id(struct ib_mad *mad)
{
if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
struct cm_sidr_generic_msg *msg =
(struct cm_sidr_generic_msg *)mad;
return be32_to_cpu(msg->request_id);
} else if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
pr_err("trying to set local_comm_id in SIDR_REP\n");
return -1;
} else {
struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
return be32_to_cpu(msg->local_comm_id);
}
}
static void set_remote_comm_id(struct ib_mad *mad, u32 cm_id)
{
if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
struct cm_sidr_generic_msg *msg =
(struct cm_sidr_generic_msg *)mad;
msg->request_id = cpu_to_be32(cm_id);
} else if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
pr_err("trying to set remote_comm_id in SIDR_REQ\n");
return;
} else {
struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
msg->remote_comm_id = cpu_to_be32(cm_id);
}
}
static u32 get_remote_comm_id(struct ib_mad *mad)
{
if (mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
struct cm_sidr_generic_msg *msg =
(struct cm_sidr_generic_msg *)mad;
return be32_to_cpu(msg->request_id);
} else if (mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
pr_err("trying to set remote_comm_id in SIDR_REQ\n");
return -1;
} else {
struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
return be32_to_cpu(msg->remote_comm_id);
}
}
static union ib_gid gid_from_req_msg(struct ib_device *ibdev, struct ib_mad *mad)
@ -285,19 +325,22 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
u32 sl_cm_id;
int pv_cm_id = -1;
sl_cm_id = get_local_comm_id(mad);
if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
mad->mad_hdr.attr_id == CM_REP_ATTR_ID) {
mad->mad_hdr.attr_id == CM_REP_ATTR_ID ||
mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID ||
mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
sl_cm_id = get_local_comm_id(mad);
id = id_map_alloc(ibdev, slave_id, sl_cm_id);
if (IS_ERR(id)) {
mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n",
__func__, slave_id, sl_cm_id);
return PTR_ERR(id);
}
} else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID) {
} else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID ||
mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
return 0;
} else {
sl_cm_id = get_local_comm_id(mad);
id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id);
}
@ -323,7 +366,8 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
u32 pv_cm_id;
struct id_map_entry *id;
if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) {
if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
union ib_gid gid;
if (is_eth)
@ -333,7 +377,7 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
*slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id);
if (*slave < 0) {
mlx4_ib_warn(ibdev, "failed matching slave_id by gid (0x%llx)\n",
(long long)gid.global.interface_id);
(unsigned long long)gid.global.interface_id);
return -ENOENT;
}
return 0;

View File

@ -33,6 +33,7 @@
#include <linux/mlx4/cq.h>
#include <linux/mlx4/qp.h>
#include <linux/mlx4/srq.h>
#include <linux/slab.h>
#include "mlx4_ib.h"
@ -92,12 +93,33 @@ static struct mlx4_cqe *next_cqe_sw(struct mlx4_ib_cq *cq)
return get_sw_cqe(cq, cq->mcq.cons_index);
}
int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
int mlx4_ib_modify_cq(struct ib_cq *cq,
struct ib_cq_attr *cq_attr,
int cq_attr_mask)
{
int err = 0;
struct mlx4_ib_cq *mcq = to_mcq(cq);
struct mlx4_ib_dev *dev = to_mdev(cq->device);
return mlx4_cq_modify(dev->dev, &mcq->mcq, cq_count, cq_period);
if (cq_attr_mask & IB_CQ_CAP_FLAGS) {
if (cq_attr->cq_cap_flags & IB_CQ_TIMESTAMP)
return -ENOTSUPP;
if (cq_attr->cq_cap_flags & IB_CQ_IGNORE_OVERRUN) {
if (dev->dev->caps.cq_flags & MLX4_DEV_CAP_CQ_FLAG_IO)
err = mlx4_cq_ignore_overrun(dev->dev, &mcq->mcq);
else
err = -ENOSYS;
}
}
if (!err)
if (cq_attr_mask & IB_CQ_MODERATION)
err = mlx4_cq_modify(dev->dev, &mcq->mcq,
cq_attr->moderation.cq_count,
cq_attr->moderation.cq_period);
return err;
}
static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int nent)
@ -173,7 +195,11 @@ err_buf:
return err;
}
struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector,
/* we don't support system timestamping */
#define CQ_CREATE_FLAGS_SUPPORTED IB_CQ_TIMESTAMP
struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
struct ib_udata *udata)
{
@ -181,11 +207,16 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
struct mlx4_ib_cq *cq;
struct mlx4_uar *uar;
int err;
int entries = attr->cqe;
int vector = attr->comp_vector;
if (entries < 1 || entries > dev->dev->caps.max_cqes)
return ERR_PTR(-EINVAL);
cq = kmalloc(sizeof *cq, GFP_KERNEL);
if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED)
return ERR_PTR(-EINVAL);
cq = kzalloc(sizeof(*cq), GFP_KERNEL);
if (!cq)
return ERR_PTR(-ENOMEM);
@ -195,6 +226,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
spin_lock_init(&cq->lock);
cq->resize_buf = NULL;
cq->resize_umem = NULL;
cq->create_flags = attr->flags;
if (context) {
struct mlx4_ib_create_cq ucmd;
@ -236,7 +268,8 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
vector = dev->eq_table[vector % ibdev->num_comp_vectors];
err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
cq->db.dma, &cq->mcq, vector, 0, 0);
cq->db.dma, &cq->mcq, vector, 0,
!!(cq->create_flags & IB_CQ_TIMESTAMP));
if (err)
goto err_dbmap;
@ -331,21 +364,23 @@ static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq)
u32 i;
i = cq->mcq.cons_index;
while (get_sw_cqe(cq, i & cq->ibcq.cqe))
while (get_sw_cqe(cq, i))
++i;
return i - cq->mcq.cons_index;
}
static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
static int mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
{
struct mlx4_cqe *cqe, *new_cqe;
int i;
int cqe_size = cq->buf.entry_size;
int cqe_inc = cqe_size == 64 ? 1 : 0;
struct mlx4_cqe *start_cqe;
i = cq->mcq.cons_index;
cqe = get_cqe(cq, i & cq->ibcq.cqe);
start_cqe = cqe;
cqe += cqe_inc;
while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) {
@ -357,9 +392,15 @@ static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) |
(((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0);
cqe = get_cqe(cq, ++i & cq->ibcq.cqe);
if (cqe == start_cqe) {
pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n", cq->mcq.cqn);
return -ENOMEM;
}
cqe += cqe_inc;
}
++cq->mcq.cons_index;
return 0;
}
int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
@ -374,7 +415,6 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
return -ENOSYS;
mutex_lock(&cq->resize_mutex);
if (entries < 1 || entries > dev->dev->caps.max_cqes) {
err = -EINVAL;
goto out;
@ -386,6 +426,11 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
goto out;
}
if (entries > dev->dev->caps.max_cqes + 1) {
err = -EINVAL;
goto out;
}
if (ibcq->uobject) {
err = mlx4_alloc_resize_umem(dev, cq, entries, udata);
if (err)
@ -425,7 +470,7 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
spin_lock_irq(&cq->lock);
if (cq->resize_buf) {
mlx4_ib_cq_resize_copy_cqes(cq);
err = mlx4_ib_cq_resize_copy_cqes(cq);
tmp_buf = cq->buf;
tmp_cqe = cq->ibcq.cqe;
cq->buf = cq->resize_buf->buf;
@ -580,7 +625,7 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum)
}
static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
unsigned tail, struct mlx4_cqe *cqe)
unsigned tail, struct mlx4_cqe *cqe, int is_eth)
{
struct mlx4_ib_proxy_sqp_hdr *hdr;
@ -590,12 +635,19 @@ static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct
DMA_FROM_DEVICE);
hdr = (struct mlx4_ib_proxy_sqp_hdr *) (qp->sqp_proxy_rcv[tail].addr);
wc->pkey_index = be16_to_cpu(hdr->tun.pkey_index);
wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32);
wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
wc->src_qp = be32_to_cpu(hdr->tun.flags_src_qp) & 0xFFFFFF;
wc->wc_flags |= (hdr->tun.g_ml_path & 0x80) ? (IB_WC_GRH) : 0;
wc->dlid_path_bits = 0;
if (is_eth) {
wc->vlan_id = be16_to_cpu(hdr->tun.sl_vid);
memcpy(&(wc->smac[0]), (char *)&hdr->tun.mac_31_0, 4);
memcpy(&(wc->smac[4]), (char *)&hdr->tun.slid_mac_47_32, 2);
} else {
wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32);
wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
}
return 0;
}
@ -607,11 +659,14 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
struct mlx4_qp *mqp;
struct mlx4_ib_wq *wq;
struct mlx4_ib_srq *srq;
struct mlx4_srq *msrq = NULL;
int is_send;
int is_error;
u32 g_mlpath_rqpn;
u16 wqe_ctr;
unsigned tail = 0;
int timestamp_en = !!(cq->create_flags & IB_CQ_TIMESTAMP);
repoll:
cqe = next_cqe_sw(cq);
@ -675,6 +730,20 @@ repoll:
wc->qp = &(*cur_qp)->ibqp;
if (wc->qp->qp_type == IB_QPT_XRC_TGT) {
u32 srq_num;
g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn);
srq_num = g_mlpath_rqpn & 0xffffff;
/* SRQ is also in the radix tree */
msrq = mlx4_srq_lookup(to_mdev(cq->ibcq.device)->dev,
srq_num);
if (unlikely(!msrq)) {
pr_warn("CQ %06x with entry for unknown SRQN %06x\n",
cq->mcq.cqn, srq_num);
return -EINVAL;
}
}
if (is_send) {
wq = &(*cur_qp)->sq;
if (!(*cur_qp)->sq_signal_bits) {
@ -688,6 +757,11 @@ repoll:
wqe_ctr = be16_to_cpu(cqe->wqe_index);
wc->wr_id = srq->wrid[wqe_ctr];
mlx4_ib_free_srq_wqe(srq, wqe_ctr);
} else if (msrq) {
srq = to_mibsrq(msrq);
wqe_ctr = be16_to_cpu(cqe->wqe_index);
wc->wr_id = srq->wrid[wqe_ctr];
mlx4_ib_free_srq_wqe(srq, wqe_ctr);
} else {
wq = &(*cur_qp)->rq;
tail = wq->tail & (wq->wqe_cnt - 1);
@ -707,6 +781,7 @@ repoll:
switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
case MLX4_OPCODE_RDMA_WRITE_IMM:
wc->wc_flags |= IB_WC_WITH_IMM;
/* fall through */
case MLX4_OPCODE_RDMA_WRITE:
wc->opcode = IB_WC_RDMA_WRITE;
break;
@ -778,10 +853,31 @@ repoll:
if ((*cur_qp)->mlx4_ib_qp_type &
(MLX4_IB_QPT_PROXY_SMI_OWNER |
MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
return use_tunnel_data(*cur_qp, cq, wc, tail, cqe);
return use_tunnel_data
(*cur_qp, cq, wc, tail, cqe,
rdma_port_get_link_layer
(wc->qp->device,
(*cur_qp)->port) ==
IB_LINK_LAYER_ETHERNET);
}
if (timestamp_en) {
/* currently, only CQ_CREATE_WITH_TIMESTAMPING_RAW is
* supported. CQ_CREATE_WITH_TIMESTAMPING_SYS isn't
* supported */
if (cq->create_flags & IB_CQ_TIMESTAMP_TO_SYS_TIME) {
wc->ts.timestamp = 0;
} else {
wc->ts.timestamp =
((u64)(be32_to_cpu(cqe->timestamp_16_47)
+ !cqe->timestamp_0_15) << 16)
| be16_to_cpu(cqe->timestamp_0_15);
wc->wc_flags |= IB_WC_WITH_TIMESTAMP;
}
} else {
wc->wc_flags |= IB_WC_WITH_SLID;
wc->slid = be16_to_cpu(cqe->rlid);
}
g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn);
wc->src_qp = g_mlpath_rqpn & 0xffffff;
wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
@ -789,11 +885,27 @@ repoll:
wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status,
cqe->checksum) ? IB_WC_IP_CSUM_OK : 0;
if (!timestamp_en) {
if (rdma_port_get_link_layer(wc->qp->device,
(*cur_qp)->port) == IB_LINK_LAYER_ETHERNET)
(*cur_qp)->port) ==
IB_LINK_LAYER_ETHERNET)
wc->sl = be16_to_cpu(cqe->sl_vid) >> 13;
else
wc->sl = be16_to_cpu(cqe->sl_vid) >> 12;
wc->wc_flags |= IB_WC_WITH_SL;
}
if ((be32_to_cpu(cqe->vlan_my_qpn) &
MLX4_CQE_VLAN_PRESENT_MASK) && !timestamp_en) {
wc->vlan_id = be16_to_cpu(cqe->sl_vid) &
MLX4_CQE_VID_MASK;
wc->wc_flags |= IB_WC_WITH_VLAN;
} else {
wc->vlan_id = 0xffff;
}
if (!timestamp_en) {
memcpy(wc->smac, cqe->smac, 6);
wc->wc_flags |= IB_WC_WITH_SMAC;
}
}
return 0;

View File

@ -45,7 +45,6 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
struct mlx4_db *db)
{
struct mlx4_ib_user_db_page *page;
struct ib_umem_chunk *chunk;
int err = 0;
mutex_lock(&context->db_page_mutex);
@ -73,8 +72,7 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
list_add(&page->list, &context->db_page_list);
found:
chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list);
db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK);
db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK);
db->u.user_page = page;
++page->refcnt;

View File

@ -545,11 +545,32 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
/* adjust tunnel data */
tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix);
tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF);
tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0;
if (is_eth) {
u16 vlan = 0;
if (mlx4_get_slave_default_vlan(dev->dev, port, slave, &vlan,
NULL)) {
if (vlan != wc->vlan_id)
/* VST and default vlan is not the packet vlan drop the
* packet*/
goto out;
else
/* VST , remove hide the vlan from the VF */
vlan = 0;
} else {
vlan = wc->vlan_id;
}
tun_mad->hdr.sl_vid = cpu_to_be16(vlan);
memcpy((char *)&tun_mad->hdr.mac_31_0, &(wc->smac[0]), 4);
memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2);
} else {
tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
}
ib_dma_sync_single_for_device(&dev->ib_dev,
tun_qp->tx_ring[tun_tx_ix].buf.map,
sizeof (struct mlx4_rcv_tunnel_mad),
@ -696,12 +717,11 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
be16_to_cpu(in_mad->mad_hdr.attr_id));
if (in_wc->wc_flags & IB_WC_GRH) {
pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n",
(long long)be64_to_cpu(in_grh->sgid.global.subnet_prefix),
(long long)
be64_to_cpu(in_grh->sgid.global.interface_id));
(unsigned long long)be64_to_cpu(in_grh->sgid.global.subnet_prefix),
(unsigned long long)be64_to_cpu(in_grh->sgid.global.interface_id));
pr_debug("dgid_hi:0x%016llx dgid_lo:0x%016llx\n",
(long long)be64_to_cpu(in_grh->dgid.global.subnet_prefix),
(long long)be64_to_cpu(in_grh->dgid.global.interface_id));
(unsigned long long)be64_to_cpu(in_grh->dgid.global.subnet_prefix),
(unsigned long long)be64_to_cpu(in_grh->dgid.global.interface_id));
}
}
@ -946,7 +966,7 @@ int mlx4_ib_query_if_stat(struct mlx4_ib_dev *dev, u32 counter_index,
err = mlx4_cmd_box(dev->dev, 0, mailbox->dma, inmod, 0,
MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C,
MLX4_CMD_WRAPPED);
MLX4_CMD_NATIVE);
if (!err)
memcpy(counter, mailbox->buf, MLX4_IF_STAT_SZ(1));
@ -961,7 +981,7 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
int err;
u32 counter_index = dev->counters[port_num - 1] & 0xffff;
u32 counter_index = dev->counters[port_num - 1].counter_index & 0xffff;
u8 mode;
char counter_buf[MLX4_IF_STAT_SZ(1)];
union mlx4_counter *counter = (union mlx4_counter *)
@ -970,10 +990,16 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
return -EINVAL;
if (mlx4_ib_query_if_stat(dev, counter_index, counter, 0)) {
err = IB_MAD_RESULT_FAILURE;
} else {
/* in case of default counter IB shares the counter with ETH */
/* the state could be -EEXIST or -ENOSPC */
if (dev->counters[port_num - 1].status) {
memset(out_mad->data, 0, sizeof out_mad->data);
err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
} else {
if (mlx4_ib_query_if_stat(dev, counter_index, counter, 0))
return IB_MAD_RESULT_FAILURE;
memset(out_mad->data, 0, sizeof(out_mad->data));
mode = counter->control.cnt_mode & 0xFF;
err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
switch (mode & 0xf) {
@ -992,7 +1018,6 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
}
}
return err;
}
@ -1179,6 +1204,11 @@ void handle_port_mgmt_change_event(struct work_struct *work)
u16 lid = be16_to_cpu(eqe->event.port_mgmt_change.params.port_info.mstr_sm_lid);
u8 sl = eqe->event.port_mgmt_change.params.port_info.mstr_sm_sl & 0xf;
update_sm_ah(dev, port, lid, sl);
mlx4_ib_dispatch_event(dev, port, IB_EVENT_SM_CHANGE);
if (mlx4_is_master(dev->dev))
mlx4_gen_slaves_port_mgt_ev(dev->dev, port,
changed_attr & MSTR_SM_CHANGE_MASK,
lid, sl);
}
/* Check if it is a lid change event */
@ -1295,8 +1325,9 @@ static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad)
enum ib_qp_type dest_qpt, u16 pkey_index,
u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr,
u8 *s_mac, struct ib_mad *mad)
{
struct ib_sge list;
struct ib_send_wr wr, *bad_wr;
@ -1385,6 +1416,9 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
wr.num_sge = 1;
wr.opcode = IB_WR_SEND;
wr.send_flags = IB_SEND_SIGNALED;
if (s_mac)
memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6);
ret = ib_post_send(send_qp, &wr, &bad_wr);
out:
@ -1512,6 +1546,11 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
if (ah_attr.ah_flags & IB_AH_GRH)
if (get_real_sgid_index(dev, slave, ctx->port, &ah_attr))
return;
memcpy(ah_attr.dmac, tunnel->hdr.mac, 6);
ah_attr.vlan_id = tunnel->hdr.vlan;
/* if slave have default vlan use it */
mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave,
&ah_attr.vlan_id, &ah_attr.sl);
mlx4_ib_send_to_wire(dev, slave, ctx->port,
is_proxy_qp0(dev, wc->src_qp, slave) ?
@ -1519,7 +1558,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
be16_to_cpu(tunnel->hdr.pkey_index),
be32_to_cpu(tunnel->hdr.remote_qpn),
be32_to_cpu(tunnel->hdr.qkey),
&ah_attr, &tunnel->mad);
&ah_attr, wc->smac, &tunnel->mad);
}
static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
@ -1564,6 +1603,12 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
tun_qp->ring[i].addr,
rx_buf_size,
DMA_FROM_DEVICE);
if (unlikely(ib_dma_mapping_error(ctx->ib_dev,
tun_qp->ring[i].map))) {
mlx4_ib_warn(ctx->ib_dev, "ib_dma_map_single failed\n");
kfree(tun_qp->ring[i].addr);
goto err;
}
}
for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
@ -1576,6 +1621,12 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
tun_qp->tx_ring[i].buf.addr,
tx_buf_size,
DMA_TO_DEVICE);
if (unlikely(ib_dma_mapping_error(ctx->ib_dev,
tun_qp->tx_ring[i].buf.map))) {
mlx4_ib_warn(ctx->ib_dev, "ib_dma_map_single failed\n");
kfree(tun_qp->tx_ring[i].buf.addr);
goto tx_err;
}
tun_qp->tx_ring[i].ah = NULL;
}
spin_lock_init(&tun_qp->tx_lock);
@ -1664,12 +1715,12 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
(MLX4_NUM_TUNNEL_BUFS - 1));
if (ret)
pr_err("Failed reposting tunnel "
"buf:%lld\n", (long long)wc.wr_id);
"buf:%lld\n", (unsigned long long)wc.wr_id);
break;
case IB_WC_SEND:
pr_debug("received tunnel send completion:"
"wrid=0x%llx, status=0x%x\n",
(long long)wc.wr_id, wc.status);
(unsigned long long)wc.wr_id, wc.status);
ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)].ah);
tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
@ -1685,7 +1736,7 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
} else {
pr_debug("mlx4_ib: completion error in tunnel: %d."
" status = %d, wrid = 0x%llx\n",
ctx->slave, wc.status, (long long)wc.wr_id);
ctx->slave, wc.status, (unsigned long long)wc.wr_id);
if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)].ah);
@ -1757,6 +1808,11 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
memset(&attr, 0, sizeof attr);
attr.qp_state = IB_QPS_INIT;
ret = 0;
if (create_tun)
ret = find_slave_port_pkey_ix(to_mdev(ctx->ib_dev), ctx->slave,
ctx->port, 0xFFFF, &attr.pkey_index);
if (ret || !create_tun)
attr.pkey_index =
to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0];
attr.qkey = IB_QP1_QKEY;
@ -1837,7 +1893,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)))
pr_err("Failed reposting SQP "
"buf:%lld\n", (long long)wc.wr_id);
"buf:%lld\n", (unsigned long long)wc.wr_id);
break;
default:
BUG_ON(1);
@ -1846,7 +1902,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
} else {
pr_debug("mlx4_ib: completion error in tunnel: %d."
" status = %d, wrid = 0x%llx\n",
ctx->slave, wc.status, (long long)wc.wr_id);
ctx->slave, wc.status, (unsigned long long)wc.wr_id);
if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
ib_destroy_ah(sqp->tx_ring[wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)].ah);

File diff suppressed because it is too large Load Diff

View File

@ -36,6 +36,7 @@
#include <rdma/ib_sa.h>
#include <linux/mlx4/cmd.h>
#include <linux/rbtree.h>
#include <linux/delay.h>
#include "mlx4_ib.h"
@ -53,6 +54,7 @@
#define mcg_error_group(group, format, arg...) \
pr_err(" %16s: " format, (group)->name, ## arg)
static union ib_gid mgid0;
static struct workqueue_struct *clean_wq;
@ -214,7 +216,7 @@ static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad)
mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
spin_unlock(&dev->sm_lock);
return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), ctx->port,
IB_QPT_GSI, 0, 1, IB_QP1_QKEY, &ah_attr, mad);
IB_QPT_GSI, 0, 1, IB_QP1_QKEY, &ah_attr, 0, mad);
}
static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx,
@ -567,7 +569,7 @@ static void mlx4_ib_mcg_timeout_handler(struct work_struct *work)
mcg_warn_group(group, "invalid state %s\n", get_state_string(group->state));
group->state = MCAST_IDLE;
atomic_inc(&group->refcount);
queue_work(group->demux->mcg_wq, &group->work);
if (!queue_work(group->demux->mcg_wq, &group->work))
safe_atomic_dec(&group->refcount);
mutex_unlock(&group->lock);
@ -656,8 +658,9 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work)
method = group->response_sa_mad.mad_hdr.method;
if (group->last_req_tid != group->response_sa_mad.mad_hdr.tid) {
mcg_warn_group(group, "Got MAD response to existing MGID but wrong TID, dropping. Resp TID=%llx, group TID=%llx\n",
(long long unsigned int)be64_to_cpu(group->response_sa_mad.mad_hdr.tid),
(long long unsigned int)be64_to_cpu(group->last_req_tid));
(long long)be64_to_cpu(
group->response_sa_mad.mad_hdr.tid),
(long long)be64_to_cpu(group->last_req_tid));
group->state = group->prev_state;
goto process_requests;
}
@ -665,7 +668,7 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work)
if (!list_empty(&group->pending_list))
req = list_first_entry(&group->pending_list,
struct mcast_req, group_list);
if (method == IB_MGMT_METHOD_GET_RESP) {
if ((method == IB_MGMT_METHOD_GET_RESP)) {
if (req) {
send_reply_to_slave(req->func, group, &req->sa_mad, status);
--group->func[req->func].num_pend_reqs;
@ -752,8 +755,8 @@ static struct mcast_group *search_relocate_mgid0_group(struct mlx4_ib_demux_ctx
if (memcmp(new_mgid, &mgid0, sizeof mgid0)) {
group->rec.mgid = *new_mgid;
sprintf(group->name, "%016llx%016llx",
(long long unsigned int)be64_to_cpu(group->rec.mgid.global.subnet_prefix),
(long long unsigned int)be64_to_cpu(group->rec.mgid.global.interface_id));
(long long)be64_to_cpu(group->rec.mgid.global.subnet_prefix),
(long long)be64_to_cpu(group->rec.mgid.global.interface_id));
list_del_init(&group->mgid0_list);
cur_group = mcast_insert(ctx, group);
if (cur_group) {
@ -834,8 +837,10 @@ static struct mcast_group *acquire_group(struct mlx4_ib_demux_ctx *ctx,
INIT_DELAYED_WORK(&group->timeout_work, mlx4_ib_mcg_timeout_handler);
mutex_init(&group->lock);
sprintf(group->name, "%016llx%016llx",
(long long unsigned int)be64_to_cpu(group->rec.mgid.global.subnet_prefix),
(long long unsigned int)be64_to_cpu(group->rec.mgid.global.interface_id));
(long long)be64_to_cpu(
group->rec.mgid.global.subnet_prefix),
(long long)be64_to_cpu(
group->rec.mgid.global.interface_id));
sysfs_attr_init(&group->dentry.attr);
group->dentry.show = sysfs_show_group;
group->dentry.store = NULL;
@ -871,7 +876,7 @@ static void queue_req(struct mcast_req *req)
list_add_tail(&req->group_list, &group->pending_list);
list_add_tail(&req->func_list, &group->func[req->func].pending);
/* calls mlx4_ib_mcg_work_handler */
queue_work(group->demux->mcg_wq, &group->work);
if (!queue_work(group->demux->mcg_wq, &group->work))
safe_atomic_dec(&group->refcount);
}
@ -907,7 +912,7 @@ int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave,
group->state = MCAST_RESP_READY;
/* calls mlx4_ib_mcg_work_handler */
atomic_inc(&group->refcount);
queue_work(ctx->mcg_wq, &group->work);
if (!queue_work(ctx->mcg_wq, &group->work))
safe_atomic_dec(&group->refcount);
mutex_unlock(&group->lock);
release_group(group, 0);
@ -998,13 +1003,14 @@ static ssize_t sysfs_show_group(struct device *dev,
else
sprintf(state_str, "%s(TID=0x%llx)",
get_state_string(group->state),
(long long unsigned int)be64_to_cpu(group->last_req_tid));
(long long)be64_to_cpu(group->last_req_tid));
if (list_empty(&group->pending_list)) {
sprintf(pending_str, "No");
} else {
req = list_first_entry(&group->pending_list, struct mcast_req, group_list);
sprintf(pending_str, "Yes(TID=0x%llx)",
(long long unsigned int)be64_to_cpu(req->sa_mad.mad_hdr.tid));
(long long)be64_to_cpu(
req->sa_mad.mad_hdr.tid));
}
len += sprintf(buf + len, "%1d [%02d,%02d,%02d] %4d %4s %5s ",
group->rec.scope_join_state & 0xf,

View File

@ -0,0 +1,116 @@
/*
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "mlx4_ib.h"
#include "mlx4_exp.h"
#include <linux/mlx4/qp.h>
int mlx4_ib_exp_query_device(struct ib_device *ibdev,
struct ib_exp_device_attr *props)
{
struct ib_device_attr *base = &props->base;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
int ret = mlx4_ib_query_device(ibdev, &props->base);
props->exp_comp_mask = IB_EXP_DEVICE_ATTR_INLINE_RECV_SZ;
props->inline_recv_sz = dev->dev->caps.max_rq_sg * sizeof(struct mlx4_wqe_data_seg);
props->device_cap_flags2 = 0;
/* move RSS device cap from device_cap to device_cap_flags2 */
if (base->device_cap_flags & IB_DEVICE_QPG) {
props->device_cap_flags2 |= IB_EXP_DEVICE_QPG;
if (base->device_cap_flags & IB_DEVICE_UD_RSS)
props->device_cap_flags2 |= IB_EXP_DEVICE_UD_RSS;
}
base->device_cap_flags &= ~(IB_DEVICE_QPG |
IB_DEVICE_UD_RSS |
IB_DEVICE_UD_TSS);
if (base->max_rss_tbl_sz > 0) {
props->max_rss_tbl_sz = base->max_rss_tbl_sz;
props->exp_comp_mask |= IB_EXP_DEVICE_ATTR_RSS_TBL_SZ;
} else {
props->max_rss_tbl_sz = 0;
props->exp_comp_mask &= ~IB_EXP_DEVICE_ATTR_RSS_TBL_SZ;
}
if (props->device_cap_flags2)
props->exp_comp_mask |= IB_EXP_DEVICE_ATTR_CAP_FLAGS2;
return ret;
}
/*
* Experimental functions
*/
struct ib_qp *mlx4_ib_exp_create_qp(struct ib_pd *pd,
struct ib_exp_qp_init_attr *init_attr,
struct ib_udata *udata)
{
int rwqe_size;
struct ib_qp *qp;
struct mlx4_ib_qp *mqp;
int use_inlr;
struct mlx4_ib_dev *dev;
if (init_attr->max_inl_recv && !udata)
return ERR_PTR(-EINVAL);
use_inlr = mlx4_ib_qp_has_rq((struct ib_qp_init_attr *)init_attr) &&
init_attr->max_inl_recv && pd;
if (use_inlr) {
rwqe_size = roundup_pow_of_two(max(1U, init_attr->cap.max_recv_sge)) *
sizeof(struct mlx4_wqe_data_seg);
if (rwqe_size < init_attr->max_inl_recv) {
dev = to_mdev(pd->device);
init_attr->max_inl_recv = min(init_attr->max_inl_recv,
(u32)(dev->dev->caps.max_rq_sg *
sizeof(struct mlx4_wqe_data_seg)));
init_attr->cap.max_recv_sge = roundup_pow_of_two(init_attr->max_inl_recv) /
sizeof(struct mlx4_wqe_data_seg);
}
} else {
init_attr->max_inl_recv = 0;
}
qp = mlx4_ib_create_qp(pd, (struct ib_qp_init_attr *)init_attr, udata);
if (IS_ERR(qp))
return qp;
if (use_inlr) {
mqp = to_mqp(qp);
mqp->max_inlr_data = 1 << mqp->rq.wqe_shift;
init_attr->max_inl_recv = mqp->max_inlr_data;
}
return qp;
}

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2006, 2007 Cisco Systems. All rights reserved.
* Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MLX4_EXP_H
#define MLX4_EXP_H
#include <rdma/ib_verbs_exp.h>
#include "mlx4_ib.h"
struct ib_qp *mlx4_ib_exp_create_qp(struct ib_pd *pd,
struct ib_exp_qp_init_attr *init_attr,
struct ib_udata *udata);
int mlx4_ib_exp_query_device(struct ib_device *ibdev,
struct ib_exp_device_attr *props);
#endif /* MLX4_EXP_H */

View File

@ -38,6 +38,7 @@
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/idr.h>
#include <linux/rbtree.h>
#include <linux/notifier.h>
#include <rdma/ib_verbs.h>
@ -47,7 +48,6 @@
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
#include <linux/rbtree.h>
#define MLX4_IB_DRV_NAME "mlx4_ib"
@ -72,9 +72,7 @@ enum {
/*module param to indicate if SM assigns the alias_GUID*/
extern int mlx4_ib_sm_guid_assign;
#ifdef __linux__
extern struct proc_dir_entry *mlx4_mrs_dir_entry;
#endif
#define MLX4_IB_UC_STEER_QPN_ALIGN 1
#define MLX4_IB_UC_MAX_NUM_QPS (256 * 1024)
@ -128,6 +126,7 @@ struct mlx4_ib_cq {
struct mutex resize_mutex;
struct ib_umem *umem;
struct ib_umem *resize_umem;
int create_flags;
};
struct mlx4_ib_mr {
@ -135,6 +134,13 @@ struct mlx4_ib_mr {
struct mlx4_mr mmr;
struct ib_umem *umem;
struct mlx4_shared_mr_info *smr_info;
atomic_t invalidated;
struct completion invalidation_comp;
};
struct mlx4_ib_mw {
struct ib_mw ibmw;
struct mlx4_mw mmw;
};
struct mlx4_ib_fast_reg_page_list {
@ -148,6 +154,12 @@ struct mlx4_ib_fmr {
struct mlx4_fmr mfmr;
};
struct mlx4_ib_flow {
struct ib_flow ibflow;
/* translating DMFS verbs sniffer rule to FW API requires two reg IDs */
u64 reg_id[2];
};
struct mlx4_ib_wq {
u64 *wrid;
spinlock_t lock;
@ -163,6 +175,9 @@ struct mlx4_ib_wq {
enum mlx4_ib_qp_flags {
MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
MLX4_IB_QP_CAP_CROSS_CHANNEL = IB_QP_CREATE_CROSS_CHANNEL,
MLX4_IB_QP_CAP_MANAGED_SEND = IB_QP_CREATE_MANAGED_SEND,
MLX4_IB_QP_CAP_MANAGED_RECV = IB_QP_CREATE_MANAGED_RECV,
MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
MLX4_IB_SRIOV_SQP = 1 << 31,
@ -179,6 +194,7 @@ enum mlx4_ib_mmap_cmd {
MLX4_IB_MMAP_UAR_PAGE = 0,
MLX4_IB_MMAP_BLUE_FLAME_PAGE = 1,
MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES = 2,
MLX4_IB_MMAP_GET_HW_CLOCK = 3,
};
enum mlx4_ib_qp_type {
@ -319,8 +335,14 @@ struct mlx4_ib_qp {
struct mlx4_roce_smac_vlan_info pri;
struct mlx4_roce_smac_vlan_info alt;
struct list_head rules_list;
u64 reg_id;
int max_inline_data;
struct mlx4_bf bf;
/*
* Experimental data
*/
int max_inlr_data;
};
struct mlx4_ib_srq {
@ -354,6 +376,12 @@ struct mlx4_ib_ah {
#define MLX4_NOT_SET_GUID (0x00LL)
#define MLX4_GUID_FOR_DELETE_VAL (~(0x00LL))
/****************************************/
/* ioctl codes */
/****************************************/
#define MLX4_IOC_MAGIC 'm'
#define MLX4_IOCHWCLOCKOFFSET _IOR(MLX4_IOC_MAGIC, 1, int)
enum mlx4_guid_alias_rec_status {
MLX4_GUID_INFO_STATUS_IDLE,
MLX4_GUID_INFO_STATUS_SET,
@ -478,7 +506,9 @@ struct mlx4_ib_sriov {
struct mlx4_ib_iboe {
spinlock_t lock;
struct net_device *netdevs[MLX4_MAX_PORTS];
struct net_device *masters[MLX4_MAX_PORTS];
struct notifier_block nb;
struct notifier_block nb_inet;
union ib_gid gid_table[MLX4_MAX_PORTS][128];
};
@ -518,6 +548,11 @@ struct mlx4_ib_iov_port {
struct mlx4_ib_iov_sysfs_attr mcg_dentry;
};
struct mlx4_ib_counter {
int counter_index;
int status;
};
struct mlx4_ib_dev {
struct ib_device ib_dev;
struct mlx4_dev *dev;
@ -534,7 +569,7 @@ struct mlx4_ib_dev {
struct mutex cap_mask_mutex;
bool ib_active;
struct mlx4_ib_iboe iboe;
int counters[MLX4_MAX_PORTS];
struct mlx4_ib_counter counters[MLX4_MAX_PORTS];
int *eq_table;
int eq_added;
struct kobject *iov_parent;
@ -595,6 +630,11 @@ static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr)
return container_of(ibmr, struct mlx4_ib_mr, ibmr);
}
static inline struct mlx4_ib_mw *to_mmw(struct ib_mw *ibmw)
{
return container_of(ibmw, struct mlx4_ib_mw, ibmw);
}
static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
{
return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl);
@ -604,6 +644,12 @@ static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
{
return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr);
}
static inline struct mlx4_ib_flow *to_mflow(struct ib_flow *ibflow)
{
return container_of(ibflow, struct mlx4_ib_flow, ibflow);
}
static inline struct mlx4_ib_qp *to_mqp(struct ib_qp *ibqp)
{
return container_of(ibqp, struct mlx4_ib_qp, ibqp);
@ -646,16 +692,23 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata, int mr_id);
int mlx4_ib_dereg_mr(struct ib_mr *mr);
struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
struct ib_mw_bind *mw_bind);
int mlx4_ib_dealloc_mw(struct ib_mw *mw);
struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
int max_page_list_len);
struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
int page_list_len);
void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int mlx4_ib_modify_cq(struct ib_cq *cq,
struct ib_cq_attr *cq_attr,
int cq_attr_mask);
int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
int mlx4_ib_ignore_overrun_cq(struct ib_cq *ibcq);
struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector,
struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
struct ib_udata *udata);
int mlx4_ib_destroy_cq(struct ib_cq *cq);
@ -730,6 +783,13 @@ static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
return !!(ah->av.ib.g_slid & 0x80);
}
static inline int mlx4_ib_qp_has_rq(struct ib_qp_init_attr *attr)
{
if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT)
return 0;
return !attr->srq;
}
int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx);
void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq);
@ -757,7 +817,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
struct ib_grh *grh, struct ib_mad *mad);
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad);
u32 qkey, struct ib_ah_attr *attr, u8 *s_mac, struct ib_mad *mad);
__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx);
int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
@ -799,5 +859,7 @@ int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn);
void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count);
int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
int is_attach);
int mlx4_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props);
#endif /* MLX4_IB_H */

View File

@ -35,11 +35,6 @@
#include <linux/module.h>
#include <linux/sched.h>
#ifdef __linux__
#include <linux/proc_fs.h>
#include <linux/cred.h>
#endif
#include "mlx4_ib.h"
static u32 convert_access(int acc)
@ -48,9 +43,11 @@ static u32 convert_access(int acc)
(acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) |
(acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) |
(acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) |
(acc & IB_ACCESS_MW_BIND ? MLX4_PERM_BIND_MW : 0) |
MLX4_PERM_LOCAL_READ;
}
#ifdef __linux__
/* No suuport for Shared MR feature */
#if 0
static ssize_t shared_mr_proc_read(struct file *file,
char __user *buffer,
size_t len,
@ -129,7 +126,7 @@ struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
return &mr->ibmr;
err_mr:
mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
(void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
err_free:
kfree(mr);
@ -159,7 +156,7 @@ static int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev,
if (len & (mtt_size-1ULL)) {
WARN(1 ,
"write_block: len %llx is not aligned to mtt_size %llx\n",
(long long)len, (long long)mtt_size);
(unsigned long long)len, (unsigned long long)mtt_size);
return -EINVAL;
}
@ -203,8 +200,6 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem)
{
u64 *pages;
struct ib_umem_chunk *chunk;
int j;
u64 len = 0;
int err = 0;
u64 mtt_size;
@ -212,6 +207,8 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
u64 mtt_shift;
int start_index = 0;
int npages = 0;
struct scatterlist *sg;
int i;
pages = (u64 *) __get_free_page(GFP_KERNEL);
if (!pages)
@ -220,12 +217,11 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
mtt_shift = mtt->page_shift;
mtt_size = 1ULL << mtt_shift;
list_for_each_entry(chunk, &umem->chunk_list, list)
for (j = 0; j < chunk->nmap; ++j) {
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
if (cur_start_addr + len ==
sg_dma_address(&chunk->page_list[j])) {
sg_dma_address(sg)) {
/* still the same block */
len += sg_dma_len(&chunk->page_list[j]);
len += sg_dma_len(sg);
continue;
}
/* A new block is started ...*/
@ -242,8 +238,8 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
goto out;
cur_start_addr =
sg_dma_address(&chunk->page_list[j]);
len = sg_dma_len(&chunk->page_list[j]);
sg_dma_address(sg);
len = sg_dma_len(sg);
}
/* Handle the last block */
@ -319,8 +315,6 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
u64 start_va,
int *num_of_mtts)
{
struct ib_umem_chunk *chunk;
int j;
u64 block_shift = MLX4_MAX_MTT_SHIFT;
u64 current_block_len = 0;
u64 current_block_start = 0;
@ -330,14 +324,18 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
u64 total_len = 0;
u64 last_block_aligned_end = 0;
u64 min_shift = ilog2(umem->page_size);
struct scatterlist *sg;
int i;
u64 next_block_start;
u64 current_block_end;
list_for_each_entry(chunk, &umem->chunk_list, list) {
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
/* Initialization - save the first chunk start as
the current_block_start - block means contiguous pages.
*/
if (current_block_len == 0 && current_block_start == 0) {
first_block_start = current_block_start =
sg_dma_address(&chunk->page_list[0]);
sg_dma_address(sg);
/* Find the bits that are different between
the physical address and the virtual
address for the start of the MR.
@ -361,13 +359,12 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
, block_shift);
}
/* Go over the scatter entries in the current chunk, check
/* Go over the scatter entries and check
if they continue the previous scatter entry.
*/
for (j = 0; j < chunk->nmap; ++j) {
u64 next_block_start =
sg_dma_address(&chunk->page_list[j]);
u64 current_block_end = current_block_start
next_block_start =
sg_dma_address(sg);
current_block_end = current_block_start
+ current_block_len;
/* If we have a split (non-contig.) between two block*/
if (current_block_end != next_block_start) {
@ -392,7 +389,7 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
/* Start a new block */
current_block_start = next_block_start;
current_block_len =
sg_dma_len(&chunk->page_list[j]);
sg_dma_len(sg);
continue;
}
/* The scatter entry is another part of
@ -402,8 +399,7 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
which merge some blocks together.
*/
current_block_len +=
sg_dma_len(&chunk->page_list[j]);
}
sg_dma_len(sg);
}
/* Account for the last block in the total len */
@ -416,7 +412,7 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
WARN((total_len & ((1ULL<<block_shift)-1ULL)),
" misaligned total length detected (%llu, %llu)!",
(long long)total_len, (long long)block_shift);
(unsigned long long)total_len, (unsigned long long)block_shift);
*num_of_mtts = total_len >> block_shift;
end:
@ -426,16 +422,19 @@ end:
*/
WARN(1,
"mlx4_ib_umem_calc_optimal_mtt_size - unexpected shift %lld\n",
(long long)block_shift);
(unsigned long long)block_shift);
block_shift = min_shift;
}
return block_shift;
}
#ifdef __linux__
/* No suuport for Shared MR */
#if 0
static int prepare_shared_mr(struct mlx4_ib_mr *mr, int access_flags, int mr_id)
{
struct proc_dir_entry *mr_proc_entry;
mode_t mode = S_IFREG;
char name_buff[16];
@ -475,8 +474,51 @@ static int is_shared_mr(int access_flags)
IB_ACCESS_SHARED_MR_OTHER_WRITE));
}
static void free_smr_info(struct mlx4_ib_mr *mr)
{
/* When master/parent shared mr is dereged there is
no ability to share this mr any more - its mr_id will be
returned to the kernel as part of ib_uverbs_dereg_mr
and may be allocated again as part of other reg_mr.
*/
char name_buff[16];
sprintf(name_buff, "%X", mr->smr_info->mr_id);
/* Remove proc entry is checking internally that no operation
was strated on that proc fs file and if in the middle
current process will wait till end of operation.
That's why no sync mechanism is needed when we release
below the shared umem.
*/
remove_proc_entry(name_buff, mlx4_mrs_dir_entry);
kfree(mr->smr_info);
mr->smr_info = NULL;
}
#endif
static void mlx4_invalidate_umem(void *invalidation_cookie,
struct ib_umem *umem,
unsigned long addr, size_t size)
{
struct mlx4_ib_mr *mr = (struct mlx4_ib_mr *)invalidation_cookie;
/* This function is called under client peer lock so its resources are race protected */
if (atomic_inc_return(&mr->invalidated) > 1) {
umem->invalidation_ctx->inflight_invalidation = 1;
goto end;
}
umem->invalidation_ctx->peer_callback = 1;
mlx4_mr_free(to_mdev(mr->ibmr.device)->dev, &mr->mmr);
ib_umem_release(umem);
complete(&mr->invalidation_comp);
end:
return;
}
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata,
@ -487,18 +529,20 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int shift;
int err;
int n;
struct ib_peer_memory_client *ib_peer_mem;
mr = kzalloc(sizeof *mr, GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
mr->umem = ib_umem_get(pd->uobject->context, start, length,
access_flags, 0);
mr->umem = ib_umem_get_ex(pd->uobject->context, start, length,
access_flags, 0, 1);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err_free;
}
ib_peer_mem = mr->umem->ib_peer_mem;
n = ib_umem_page_count(mr->umem);
shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start,
&n);
@ -516,7 +560,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto err_mr;
mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
#ifdef __linux__
/* No suuport for Shared MR */
#if 0
/* Check whether MR should be shared */
if (is_shared_mr(access_flags)) {
/* start address and length must be aligned to page size in order
@ -531,10 +576,32 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto err_mr;
}
#endif
if (ib_peer_mem) {
if (access_flags & IB_ACCESS_MW_BIND) {
/* Prevent binding MW on peer clients.
* mlx4_invalidate_umem must be void,
* therefore, mlx4_mr_free should not fail
* when using peer clients. */
err = -ENOSYS;
pr_err("MW is not supported with peer memory client");
goto err_smr;
}
init_completion(&mr->invalidation_comp);
ib_umem_activate_invalidation_notifier(mr->umem,
mlx4_invalidate_umem, mr);
}
atomic_set(&mr->invalidated, 0);
return &mr->ibmr;
err_smr:
/* No suuport for Shared MR */
#if 0
if (mr->smr_info)
free_smr_info(mr);
#endif
err_mr:
mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
(void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
err_umem:
ib_umem_release(mr->umem);
@ -545,41 +612,106 @@ err_free:
return ERR_PTR(err);
}
int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
{
struct mlx4_ib_mr *mr = to_mmr(ibmr);
struct ib_umem *umem = mr->umem;
int ret;
mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
if (mr->smr_info) {
/* When master/parent shared mr is dereged there is
no ability to share this mr any more - its mr_id will be
returned to the kernel as part of ib_uverbs_dereg_mr
and may be allocated again as part of other reg_mr.
*/
char name_buff[16];
sprintf(name_buff, "%X", mr->smr_info->mr_id);
/* Remove proc entry is checking internally that no operation
was strated on that proc fs file and if in the middle
current process will wait till end of operation.
That's why no sync mechanism is needed when we release
below the shared umem.
*/
#ifdef __linux__
remove_proc_entry(name_buff, mlx4_mrs_dir_entry);
kfree(mr->smr_info);
/* No suuport for Shared MR */
#if 0
if (mr->smr_info)
free_smr_info(mr);
#endif
if (atomic_inc_return(&mr->invalidated) > 1) {
wait_for_completion(&mr->invalidation_comp);
goto end;
}
if (mr->umem)
ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
if (ret) {
/* Error is not expected here, except when memory windows
* are bound to MR which is not supported with
* peer memory clients */
atomic_set(&mr->invalidated, 0);
return ret;
}
if (!umem)
goto end;
ib_umem_release(mr->umem);
end:
kfree(mr);
return 0;
}
struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_mw *mw;
int err;
mw = kmalloc(sizeof(*mw), GFP_KERNEL);
if (!mw)
return ERR_PTR(-ENOMEM);
err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn, (enum mlx4_mw_type)type, &mw->mmw);
if (err)
goto err_free;
err = mlx4_mw_enable(dev->dev, &mw->mmw);
if (err)
goto err_mw;
mw->ibmw.rkey = mw->mmw.key;
return &mw->ibmw;
err_mw:
mlx4_mw_free(dev->dev, &mw->mmw);
err_free:
kfree(mw);
return ERR_PTR(err);
}
int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
struct ib_mw_bind *mw_bind)
{
struct ib_send_wr wr;
struct ib_send_wr *bad_wr;
int ret;
memset(&wr, 0, sizeof(wr));
wr.opcode = IB_WR_BIND_MW;
wr.wr_id = mw_bind->wr_id;
wr.send_flags = mw_bind->send_flags;
wr.wr.bind_mw.mw = mw;
wr.wr.bind_mw.bind_info = mw_bind->bind_info;
wr.wr.bind_mw.rkey = ib_inc_rkey(mw->rkey);
ret = mlx4_ib_post_send(qp, &wr, &bad_wr);
if (!ret)
mw->rkey = wr.wr.bind_mw.rkey;
return ret;
}
int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
{
struct mlx4_ib_mw *mw = to_mmw(ibmw);
mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw);
kfree(mw);
return 0;
}
struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
int max_page_list_len)
{
@ -606,7 +738,7 @@ struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
return &mr->ibmr;
err_mr:
mlx4_mr_free(dev->dev, &mr->mmr);
(void) mlx4_mr_free(dev->dev, &mr->mmr);
err_free:
kfree(mr);
@ -685,7 +817,7 @@ struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
return &fmr->ibfmr;
err_mr:
mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr);
(void) mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr);
err_free:
kfree(fmr);

View File

@ -45,13 +45,11 @@
#include <linux/mlx4/driver.h>
#include <linux/io.h>
#ifndef __linux__
#define asm __asm
#endif
#include "mlx4_ib.h"
#include "user.h"
#define asm __asm
enum {
MLX4_IB_ACK_REQ_FREQ = 8,
};
@ -111,6 +109,8 @@ static const __be32 mlx4_ib_opcode[] = {
[IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR),
[IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
[IB_WR_BIND_MW] = cpu_to_be32(
MLX4_OPCODE_BIND_MW),
};
#ifndef wc_wmb
@ -263,7 +263,7 @@ static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size)
/* Pad the remainder of the WQE with an inline data segment. */
if (size > s) {
inl = wqe + s;
inl->byte_count = cpu_to_be32(1U << 31 | (size - s - sizeof *inl));
inl->byte_count = cpu_to_be32(1 << 31 | (size - s - sizeof *inl));
}
ctrl->srcrb_flags = 0;
ctrl->fence_size = size / 16;
@ -274,7 +274,7 @@ static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size)
wmb();
ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) |
(n & qp->sq.wqe_cnt ? cpu_to_be32(1U << 31) : 0);
(n & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
stamp_send_wqe(qp, n + qp->sq_spare_wqes, size);
}
@ -573,6 +573,12 @@ static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr,
sizeof (struct mlx4_ib_proxy_sqp_hdr),
DMA_FROM_DEVICE);
if (unlikely(ib_dma_mapping_error(dev,
qp->sqp_proxy_rcv[i].map))) {
pr_warn("ib_dma_map_single failed\n");
kfree(qp->sqp_proxy_rcv[i].addr);
goto err;
}
}
return 0;
@ -602,15 +608,6 @@ static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
kfree(qp->sqp_proxy_rcv);
}
static int qp_has_rq(struct ib_qp_init_attr *attr)
{
if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT)
return 0;
return !attr->srq;
}
#ifdef __linux__
static int init_qpg_parent(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *pqp,
struct ib_qp_init_attr *attr, int *qpn)
{
@ -644,7 +641,7 @@ static int init_qpg_parent(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *pqp,
err = mlx4_ib_steer_qp_alloc(dev, tss_align_num, &tss_base);
else
err = mlx4_qp_reserve_range(dev->dev, tss_align_num,
tss_align_num, &tss_base, 1);
tss_align_num, &tss_base, MLX4_RESERVE_BF_QP);
if (err)
goto err1;
@ -791,7 +788,6 @@ static void free_qpg_qpn(struct mlx4_ib_qp *mqp, int qpn)
break;
}
}
#endif
static int alloc_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
struct ib_qp_init_attr *attr, int *qpn)
@ -800,10 +796,12 @@ static int alloc_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
switch (attr->qpg_type) {
case IB_QPG_NONE:
/* Raw packet QPNs must be aligned to 8 bits. If not, the WQE
* BlueFlame setup flow wrongly causes VLAN insertion. */
/* Raw packet QPNs may not have bits 6,7 set in their qp_num;
* otherwise, the WQE BlueFlame setup flow wrongly causes
* VLAN insertion. */
if (attr->qp_type == IB_QPT_RAW_PACKET) {
err = mlx4_qp_reserve_range(dev->dev, 1, 1, qpn, 1);
err = mlx4_qp_reserve_range(dev->dev, 1, 1, qpn,
MLX4_RESERVE_BF_QP);
} else {
if(qp->flags & MLX4_IB_QP_NETIF)
err = mlx4_ib_steer_qp_alloc(dev, 1, qpn);
@ -812,15 +810,11 @@ static int alloc_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
}
break;
case IB_QPG_PARENT:
#ifdef __linux__
err = init_qpg_parent(dev, qp, attr, qpn);
#endif
break;
case IB_QPG_CHILD_TX:
case IB_QPG_CHILD_RX:
#ifdef __linux__
err = alloc_qpg_qpn(attr, qp, qpn);
#endif
break;
default:
qp->qpg_type = IB_QPG_NONE;
@ -844,15 +838,11 @@ static void free_qpn_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
mlx4_qp_release_range(dev->dev, qpn, 1);
break;
case IB_QPG_PARENT:
#ifdef __linux__
free_qpg_parent(dev, qp);
#endif
break;
case IB_QPG_CHILD_TX:
case IB_QPG_CHILD_RX:
#ifdef __linux__
free_qpg_qpn(qp, qpn);
#endif
break;
default:
break;
@ -881,10 +871,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
struct mlx4_ib_qp *qp;
enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
#ifndef __linux__
init_attr->qpg_type = IB_QPG_NONE;
#endif
/* When tunneling special qps, we use a plain UD qp */
if (sqpn) {
if (mlx4_is_mfunc(dev->dev) &&
@ -941,6 +927,23 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
qp->mlx4_ib_qp_type = qp_type;
if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
qp->flags |= MLX4_IB_QP_LSO;
if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
if (dev->dev->caps.steering_mode ==
MLX4_STEERING_MODE_DEVICE_MANAGED &&
!mlx4_is_mfunc(dev->dev))
qp->flags |= MLX4_IB_QP_NETIF;
else {
err = -EINVAL;
goto err;
}
}
mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
@ -952,7 +955,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp);
err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, mlx4_ib_qp_has_rq(init_attr), qp);
if (err)
goto err;
@ -961,11 +964,20 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
int shift;
int n;
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
if (!udata || ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
err = -EFAULT;
goto err;
}
if (init_attr->create_flags & IB_QP_CREATE_CROSS_CHANNEL)
qp->flags |= MLX4_IB_QP_CAP_CROSS_CHANNEL;
if (init_attr->create_flags & IB_QP_CREATE_MANAGED_SEND)
qp->flags |= MLX4_IB_QP_CAP_MANAGED_SEND;
if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV)
qp->flags |= MLX4_IB_QP_CAP_MANAGED_RECV;
qp->sq_no_prefetch = ucmd.sq_no_prefetch;
err = set_user_sq_size(dev, qp, &ucmd);
@ -990,7 +1002,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (err)
goto err_mtt;
if (qp_has_rq(init_attr)) {
if (mlx4_ib_qp_has_rq(init_attr)) {
err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
ucmd.db_addr, &qp->db);
if (err)
@ -999,23 +1011,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
} else {
qp->sq_no_prefetch = 0;
if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
qp->flags |= MLX4_IB_QP_LSO;
if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP &&
dev->dev->caps.steering_mode ==
MLX4_STEERING_MODE_DEVICE_MANAGED &&
!mlx4_is_mfunc(dev->dev))
qp->flags |= MLX4_IB_QP_NETIF;
err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
if (err)
goto err;
if (qp_has_rq(init_attr)) {
if (mlx4_ib_qp_has_rq(init_attr)) {
err = mlx4_db_alloc(dev->dev, &qp->db, 0);
if (err)
goto err;
@ -1097,7 +1097,7 @@ err_proxy:
free_proxy_bufs(pd->device, qp);
err_wrid:
if (pd->uobject) {
if (qp_has_rq(init_attr))
if (mlx4_ib_qp_has_rq(init_attr))
mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
} else {
kfree(qp->sq.wrid);
@ -1114,7 +1114,7 @@ err_buf:
mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
err_db:
if (!pd->uobject && qp_has_rq(init_attr))
if (!pd->uobject && mlx4_ib_qp_has_rq(init_attr))
mlx4_db_free(dev->dev, &qp->db);
if (qp->max_inline_data)
@ -1145,7 +1145,7 @@ static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv
{
if (send_cq == recv_cq) {
spin_lock_irq(&send_cq->lock);
(void) __acquire(&recv_cq->lock);
__acquire(&recv_cq->lock);
} else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
spin_lock_irq(&send_cq->lock);
spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
@ -1159,7 +1159,7 @@ static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *re
__releases(&send_cq->lock) __releases(&recv_cq->lock)
{
if (send_cq == recv_cq) {
(void) __release(&recv_cq->lock);
__release(&recv_cq->lock);
spin_unlock_irq(&send_cq->lock);
} else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
spin_unlock(&recv_cq->lock);
@ -1300,14 +1300,14 @@ static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
return dev->dev->caps.qp1_proxy[attr->port_num - 1];
}
#ifdef __linux__
static int check_qpg_attr(struct mlx4_ib_dev *dev,
struct ib_qp_init_attr *attr)
{
if (attr->qpg_type == IB_QPG_NONE)
return 0;
if (attr->qp_type != IB_QPT_UD)
if (attr->qp_type != IB_QPT_UD &&
attr->qp_type != IB_QPT_RAW_PACKET)
return -EINVAL;
if (attr->qpg_type == IB_QPG_PARENT) {
@ -1346,7 +1346,6 @@ static int check_qpg_attr(struct mlx4_ib_dev *dev,
}
return 0;
}
#endif
#define RESERVED_FLAGS_MASK ((((unsigned int)IB_QP_CREATE_RESERVED_END - 1) | IB_QP_CREATE_RESERVED_END) \
& ~(IB_QP_CREATE_RESERVED_START - 1))
@ -1364,6 +1363,15 @@ static enum mlx4_ib_qp_flags to_mlx4_ib_qp_flags(enum ib_qp_create_flags ib_qp_f
if (ib_qp_flags & IB_QP_CREATE_NETIF_QP)
mlx4_ib_qp_flags |= MLX4_IB_QP_NETIF;
if (ib_qp_flags & IB_QP_CREATE_CROSS_CHANNEL)
mlx4_ib_qp_flags |= MLX4_IB_QP_CAP_CROSS_CHANNEL;
if (ib_qp_flags & IB_QP_CREATE_MANAGED_SEND)
mlx4_ib_qp_flags |= MLX4_IB_QP_CAP_MANAGED_SEND;
if (ib_qp_flags & IB_QP_CREATE_MANAGED_RECV)
mlx4_ib_qp_flags |= MLX4_IB_QP_CAP_MANAGED_RECV;
/* reserved flags */
mlx4_ib_qp_flags |= (ib_qp_flags & RESERVED_FLAGS_MASK);
@ -1387,6 +1395,9 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
* and only for kernel UD QPs.
*/
if (mlx4_qp_flags & ~(MLX4_IB_QP_LSO |
MLX4_IB_QP_CAP_CROSS_CHANNEL |
MLX4_IB_QP_CAP_MANAGED_SEND |
MLX4_IB_QP_CAP_MANAGED_RECV |
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK |
MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP |
MLX4_IB_QP_NETIF))
@ -1397,19 +1408,30 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
return ERR_PTR(-EINVAL);
}
if (init_attr->create_flags &&
(udata ||
((mlx4_qp_flags & ~MLX4_IB_SRIOV_SQP) &&
if ((mlx4_qp_flags &
(MLX4_IB_QP_CAP_CROSS_CHANNEL |
MLX4_IB_QP_CAP_MANAGED_SEND |
MLX4_IB_QP_CAP_MANAGED_RECV)) &&
!(to_mdev(device)->dev->caps.flags &
MLX4_DEV_CAP_FLAG_CROSS_CHANNEL)) {
pr_debug("%s Does not support cross-channel operations\n",
to_mdev(device)->ib_dev.name);
return ERR_PTR(-EINVAL);
}
if ((init_attr->create_flags &
~(IB_QP_CREATE_CROSS_CHANNEL |
IB_QP_CREATE_MANAGED_SEND |
IB_QP_CREATE_MANAGED_RECV)) &&
(((mlx4_qp_flags & ~MLX4_IB_SRIOV_SQP) &&
init_attr->qp_type != IB_QPT_UD) ||
((mlx4_qp_flags & MLX4_IB_SRIOV_SQP) &&
init_attr->qp_type > IB_QPT_GSI)))
return ERR_PTR(-EINVAL);
#ifdef __linux__
err = check_qpg_attr(to_mdev(device), init_attr);
if (err)
return ERR_PTR(err);
#endif
switch (init_attr->qp_type) {
case IB_QPT_XRC_TGT:
@ -1559,32 +1581,42 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
}
static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
struct mlx4_ib_qp *qp, struct mlx4_qp_path *path,
u8 port, int is_primary)
static int ib_rate_to_mlx4(struct mlx4_ib_dev *dev, u8 rate)
{
if (rate == IB_RATE_PORT_CURRENT) {
return 0;
} else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) {
return -EINVAL;
} else {
while (rate != IB_RATE_2_5_GBPS &&
!(1 << (rate + MLX4_STAT_RATE_OFFSET) &
dev->dev->caps.stat_rate_support))
--rate;
}
return rate + MLX4_STAT_RATE_OFFSET;
}
static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
u8 *smac, u16 vlan_id, struct mlx4_ib_qp *qp,
struct mlx4_qp_path *path, u8 port, int is_primary)
{
struct net_device *ndev;
int err;
int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) ==
IB_LINK_LAYER_ETHERNET;
u8 mac[6];
int is_mcast;
u16 vlan_tag;
int vidx;
int smac_index;
int err;
u64 u64_mac;
u8 *smac;
struct mlx4_roce_smac_vlan_info *smac_info;
path->grh_mylmc = ah->src_path_bits & 0x7f;
path->rlid = cpu_to_be16(ah->dlid);
if (ah->static_rate) {
path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET;
while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
!(1 << path->static_rate & dev->dev->caps.stat_rate_support))
--path->static_rate;
} else
path->static_rate = 0;
err = ib_rate_to_mlx4(dev, ah->static_rate);
if (err < 0)
return err;
path->static_rate = err;
if (ah->ah_flags & IB_AH_GRH) {
if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) {
@ -1614,7 +1646,7 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
else
smac_info = &qp->alt;
vlan_tag = rdma_get_vlan_id(&dev->iboe.gid_table[port - 1][ah->grh.sgid_index]);
vlan_tag = vlan_id;
if (vlan_tag < 0x1000) {
if (smac_info->vid < 0x1000) {
/* both valid vlan ids */
@ -1653,28 +1685,13 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
}
}
err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast, port);
if (err)
return err;
/* get smac_index for RoCE use.
* If no smac was yet assigned, register one.
* If one was already assigned, but the new mac differs,
* unregister the old one and register the new one.
*/
spin_lock(&dev->iboe.lock);
ndev = dev->iboe.netdevs[port - 1];
if (ndev) {
#ifdef __linux__
smac = ndev->dev_addr; /* fixme: cache this value */
#else
smac = IF_LLADDR(ndev); /* fixme: cache this value */
#endif
u64_mac = mlx4_mac_to_u64(smac);
} else
u64_mac = dev->dev->caps.def_mac[port];
spin_unlock(&dev->iboe.lock);
if (!smac_info->smac || smac_info->smac != u64_mac) {
/* register candidate now, unreg if needed, after success */
@ -1688,7 +1705,7 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
} else
smac_index = smac_info->smac_index;
memcpy(path->dmac, mac, 6);
memcpy(path->dmac, ah->dmac, 6);
path->ackto = MLX4_IB_LINK_TYPE_ETH;
/* put MAC table smac index for IBoE */
path->grh_mylmc = (u8) (smac_index) | 0x80 ;
@ -1712,24 +1729,21 @@ static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
}
}
static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, const u8 *smac,
struct mlx4_qp_context *context)
{
struct net_device *ndev;
u64 u64_mac;
u8 *smac;
int smac_index;
ndev = dev->iboe.netdevs[qp->port - 1];
if (ndev) {
#ifdef __linux__
smac = ndev->dev_addr; /* fixme: cache this value */
#else
smac = IF_LLADDR(ndev); /* fixme: cache this value */
#endif
smac = IF_LLADDR(ndev);
u64_mac = mlx4_mac_to_u64(smac);
} else
} else {
u64_mac = dev->dev->caps.def_mac[qp->port];
}
context->pri_path.sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((qp->port - 1) << 6);
if (!qp->pri.smac) {
@ -1783,6 +1797,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
}
if (qp->max_inlr_data)
context->param3 |= cpu_to_be32(1 << 25);
if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
else if (ibqp->qp_type == IB_QPT_RAW_PACKET)
@ -1834,12 +1851,13 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
if (dev->counters[qp->port - 1] != -1) {
if (dev->counters[qp->port - 1].counter_index != -1) {
context->pri_path.counter_index =
dev->counters[qp->port - 1];
dev->counters[qp->port - 1].counter_index;
optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX;
} else
} else {
context->pri_path.counter_index = 0xff;
}
if (qp->flags & MLX4_IB_QP_NETIF &&
(qp->qpg_type == IB_QPG_NONE || qp->qpg_type == IB_QPG_PARENT)) {
@ -1855,8 +1873,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
}
if (attr_mask & IB_QP_AV) {
if (mlx4_set_path(dev, &attr->ah_attr, qp, &context->pri_path,
if ((attr_mask & IB_QP_AV) && (ibqp->qp_type != IB_QPT_RAW_PACKET)) {
if (mlx4_set_path(dev, &attr->ah_attr, (u8 *)attr->smac,
attr_mask & IB_QP_VID ?
attr->vlan_id : 0xffff ,
qp, &context->pri_path,
attr_mask & IB_QP_PORT ?
attr->port_num : qp->port, 1))
goto out;
@ -1879,12 +1900,16 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
dev->dev->caps.pkey_table_len[attr->alt_port_num])
goto out;
if (mlx4_set_path(dev, &attr->alt_ah_attr, qp, &context->alt_path,
if (mlx4_set_path(dev, &attr->alt_ah_attr, (u8 *)attr->smac,
attr_mask & IB_QP_ALT_VID ?
attr->alt_vlan_id : 0xffff,
qp, &context->alt_path,
attr->alt_port_num, 0))
goto out;
context->alt_path.pkey_index = attr->alt_pkey_index;
context->alt_path.ackto = attr->alt_timeout << 3;
context->alt_path.counter_index = dev->counters[attr->alt_port_num - 1].counter_index;
optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;
}
@ -1943,6 +1968,15 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (attr_mask & IB_M_EXT_CLASS_3)
context->params2 |= cpu_to_be32(MLX4_QP_BIT_COLL_SYNC_RQ);
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
context->params2 |= (qp->flags & MLX4_IB_QP_CAP_CROSS_CHANNEL ?
cpu_to_be32(MLX4_QP_BIT_COLL_MASTER) : 0);
context->params2 |= (qp->flags & MLX4_IB_QP_CAP_MANAGED_SEND ?
cpu_to_be32(MLX4_QP_BIT_COLL_MASTER | MLX4_QP_BIT_COLL_SYNC_SQ) : 0);
context->params2 |= (qp->flags & MLX4_IB_QP_CAP_MANAGED_RECV ?
cpu_to_be32(MLX4_QP_BIT_COLL_MASTER | MLX4_QP_BIT_COLL_SYNC_RQ) : 0);
}
if (ibqp->srq)
context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC);
@ -1997,6 +2031,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
context->pri_path.fl = 0x80;
context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
}
if (ibqp->qp_type == IB_QPT_RAW_PACKET &&
(attr_mask & IB_QP_AV)) {
context->pri_path.sched_queue |=
((attr->ah_attr.sl & 0xf) << 3);
context->pri_path.feup = 1 << 6;
}
is_eth = rdma_port_get_link_layer(&dev->ib_dev, qp->port) ==
IB_LINK_LAYER_ETHERNET;
if (is_eth) {
@ -2007,13 +2047,19 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD ||
qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI ||
qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) {
err = handle_eth_ud_smac_index(dev, qp, context);
err = handle_eth_ud_smac_index(dev, qp, (const u8 *)attr->smac, context);
if (err)
return -EINVAL;
}
}
}
if (ibqp->qp_type == IB_QPT_UD)
if (is_eth && (new_state == IB_QPS_RTR)) {
context->pri_path.ackto = MLX4_IB_LINK_TYPE_ETH;
optpar |= MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH;
}
if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
sqd_event = 1;
@ -2072,7 +2118,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
for (i = 0; i < qp->sq.wqe_cnt; ++i) {
ctrl = get_send_wqe(qp, i);
ctrl->owner_opcode = cpu_to_be32(1U << 31);
ctrl->owner_opcode = cpu_to_be32(1 << 31);
if (qp->sq_max_wqes_per_wr == 1)
ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4);
@ -2080,6 +2126,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
}
if ((qp->port && rdma_port_get_link_layer(&dev->ib_dev, qp->port) ==
IB_LINK_LAYER_ETHERNET) && (qp->ibqp.qp_type == IB_QPT_RAW_PACKET))
context->pri_path.ackto = (context->pri_path.ackto & 0xf8) |
MLX4_IB_LINK_TYPE_ETH;
err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state),
to_mlx4_state(new_state), context, optpar,
sqd_event, &qp->mqp);
@ -2268,14 +2319,22 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct mlx4_ib_qp *qp = to_mqp(ibqp);
enum ib_qp_state cur_state, new_state;
int err = -EINVAL;
int ll;
mutex_lock(&qp->mutex);
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (cur_state == new_state && cur_state == IB_QPS_RESET) {
ll = IB_LINK_LAYER_UNSPECIFIED;
} else {
int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
ll = rdma_port_get_link_layer(&dev->ib_dev, port);
}
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
attr_mask & ~IB_M_QP_MOD_VEND_MASK)) {
attr_mask & ~IB_M_QP_MOD_VEND_MASK, ll)) {
pr_debug("qpn 0x%x: invalid attribute mask specified "
"for transition %d to %d. qp_type %d,"
" attr_mask 0x%x\n",
@ -2299,11 +2358,6 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out;
}
if ((attr_mask & IB_QP_PORT) && (ibqp->qp_type == IB_QPT_RAW_PACKET) &&
(rdma_port_get_link_layer(&dev->ib_dev, attr->port_num) !=
IB_LINK_LAYER_ETHERNET))
goto out;
if (attr_mask & IB_QP_PKEY_INDEX) {
int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) {
@ -2421,11 +2475,11 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
spc = MLX4_INLINE_ALIGN -
((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
if (header_size <= spc) {
inl->byte_count = cpu_to_be32(1U << 31 | header_size);
inl->byte_count = cpu_to_be32(1 << 31 | header_size);
memcpy(inl + 1, sqp->header_buf, header_size);
i = 1;
} else {
inl->byte_count = cpu_to_be32(1U << 31 | spc);
inl->byte_count = cpu_to_be32(1 << 31 | spc);
memcpy(inl + 1, sqp->header_buf, spc);
inl = (void *) (inl + 1) + spc;
@ -2444,7 +2498,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
* of 16 mod 64.
*/
wmb();
inl->byte_count = cpu_to_be32(1U << 31 | (header_size - spc));
inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));
i = 2;
}
@ -2470,7 +2524,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
int is_eth;
int is_vlan = 0;
int is_grh;
u16 vlan = 0;
u16 uninitialized_var(vlan);
int err = 0;
send_size = 0;
@ -2497,8 +2551,10 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
return err;
}
vlan = rdma_get_vlan_id(&sgid);
is_vlan = vlan < 0x1000;
if (is_eth && ah->av.eth.vlan != 0xffff) {
vlan = cpu_to_be16(ah->av.eth.vlan) & 0x0fff;
is_vlan = 1;
}
}
ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header);
@ -2565,7 +2621,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
}
if (is_eth) {
u8 smac[6];
u8 *smac;
struct in6_addr in6;
u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
@ -2577,8 +2633,13 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2);
memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
memcpy(&in6, sgid.raw, sizeof(in6));
rdma_get_ll_mac(&in6, smac);
if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev))
smac = IF_LLADDR(to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1]);
else
smac = ah->av.eth.s_mac; /* use the src mac of the tunnel */
memcpy(sqp->ud_header.eth.smac_h, smac, 6);
if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
if (!is_vlan) {
@ -2628,11 +2689,11 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
spc = MLX4_INLINE_ALIGN -
((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
if (header_size <= spc) {
inl->byte_count = cpu_to_be32(1U << 31 | header_size);
inl->byte_count = cpu_to_be32(1 << 31 | header_size);
memcpy(inl + 1, sqp->header_buf, header_size);
i = 1;
} else {
inl->byte_count = cpu_to_be32(1U << 31 | spc);
inl->byte_count = cpu_to_be32(1 << 31 | spc);
memcpy(inl + 1, sqp->header_buf, spc);
inl = (void *) (inl + 1) + spc;
@ -2651,7 +2712,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
* of 16 mod 64.
*/
wmb();
inl->byte_count = cpu_to_be32(1U << 31 | (header_size - spc));
inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));
i = 2;
}
@ -2679,9 +2740,12 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq
static __be32 convert_access(int acc)
{
return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC) : 0) |
(acc & IB_ACCESS_REMOTE_WRITE ? cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE) : 0) |
(acc & IB_ACCESS_REMOTE_READ ? cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ) : 0) |
return (acc & IB_ACCESS_REMOTE_ATOMIC ?
cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC) : 0) |
(acc & IB_ACCESS_REMOTE_WRITE ?
cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE) : 0) |
(acc & IB_ACCESS_REMOTE_READ ?
cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ) : 0) |
(acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) |
cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ);
}
@ -2707,6 +2771,24 @@ static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr)
fseg->reserved[1] = 0;
}
static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, struct ib_send_wr *wr)
{
bseg->flags1 =
convert_access(wr->wr.bind_mw.bind_info.mw_access_flags) &
cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ |
MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE |
MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC);
bseg->flags2 = 0;
if (wr->wr.bind_mw.mw->type == IB_MW_TYPE_2)
bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2);
if (wr->wr.bind_mw.bind_info.mw_access_flags & IB_ZERO_BASED)
bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED);
bseg->new_rkey = cpu_to_be32(wr->wr.bind_mw.rkey);
bseg->lkey = cpu_to_be32(wr->wr.bind_mw.bind_info.mr->lkey);
bseg->addr = cpu_to_be64(wr->wr.bind_mw.bind_info.addr);
bseg->length = cpu_to_be64(wr->wr.bind_mw.bind_info.length);
}
static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
{
iseg->mem_key = cpu_to_be32(rkey);
@ -2792,23 +2874,25 @@ static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_
hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index);
hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
memcpy(hdr.mac, ah->av.eth.mac, 6);
hdr.vlan = cpu_to_be16(ah->av.eth.vlan);
spc = MLX4_INLINE_ALIGN -
((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
if (sizeof (hdr) <= spc) {
memcpy(inl + 1, &hdr, sizeof (hdr));
wmb();
inl->byte_count = cpu_to_be32(1U << 31 | sizeof (hdr));
inl->byte_count = cpu_to_be32(1 << 31 | sizeof (hdr));
i = 1;
} else {
memcpy(inl + 1, &hdr, spc);
wmb();
inl->byte_count = cpu_to_be32(1U << 31 | spc);
inl->byte_count = cpu_to_be32(1 << 31 | spc);
inl = (void *) (inl + 1) + spc;
memcpy(inl + 1, (void *) &hdr + spc, sizeof (hdr) - spc);
wmb();
inl->byte_count = cpu_to_be32(1U << 31 | (sizeof (hdr) - spc));
inl->byte_count = cpu_to_be32(1 << 31 | (sizeof (hdr) - spc));
i = 2;
}
@ -2833,7 +2917,7 @@ static void set_mlx_icrc_seg(void *dseg)
*/
wmb();
iseg->byte_count = cpu_to_be32((1U << 31) | 4);
iseg->byte_count = cpu_to_be32((1 << 31) | 4);
}
static void set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
@ -2901,7 +2985,7 @@ static void add_zero_len_inline(void *wqe)
{
struct mlx4_wqe_inline_seg *inl = wqe;
memset(wqe, 0, 16);
inl->byte_count = cpu_to_be32(1U << 31);
inl->byte_count = cpu_to_be32(1 << 31);
}
static int lay_inline_data(struct mlx4_ib_qp *qp, struct ib_send_wr *wr,
@ -3102,6 +3186,12 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
size += sizeof (struct mlx4_wqe_fmr_seg) / 16;
break;
case IB_WR_BIND_MW:
ctrl->srcrb_flags |=
cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
set_bind_seg(wqe, wr);
wqe += sizeof(struct mlx4_wqe_bind_seg);
size += sizeof(struct mlx4_wqe_bind_seg) / 16;
default:
/* No extra segments required for sends */
break;
@ -3246,14 +3336,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
*/
wmb();
if (wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {
if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {
*bad_wr = wr;
err = -EINVAL;
goto out;
}
ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
(ind & qp->sq.wqe_cnt ? cpu_to_be32(1U << 31) : 0) | blh;
(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;
stamp = ind + qp->sq_spare_wqes;
ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
@ -3576,6 +3666,15 @@ done:
qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ?
IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
if (qp->flags & MLX4_IB_QP_CAP_CROSS_CHANNEL)
qp_init_attr->create_flags |= IB_QP_CREATE_CROSS_CHANNEL;
if (qp->flags & MLX4_IB_QP_CAP_MANAGED_SEND)
qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND;
if (qp->flags & MLX4_IB_QP_CAP_MANAGED_RECV)
qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV;
qp_init_attr->qpg_type = ibqp->qpg_type;
if (ibqp->qpg_type == IB_QPG_PARENT)
qp_init_attr->cap.qpg_tss_mask_sz = qp->qpg_data->qpg_tss_mask_sz;
@ -3586,4 +3685,3 @@ out:
mutex_unlock(&qp->mutex);
return err;
}

View File

@ -56,8 +56,8 @@ static ssize_t show_admin_alias_guid(struct device *dev,
record_num = mlx4_ib_iov_dentry->entry_num / 8 ;
guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8 ;
return sprintf(buf, "%llx\n", (long long)
be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid.
return sprintf(buf, "%llx\n",
(long long)be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid.
ports_guid[port->num - 1].
all_rec_per_port[record_num].
all_recs[8 * guid_index_in_rec]));

View File

@ -672,8 +672,8 @@ static int mthca_destroy_qp(struct ib_qp *qp)
return 0;
}
static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
int comp_vector,
static struct ib_cq *mthca_create_cq(struct ib_device *ibdev,
struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
struct ib_udata *udata)
{
@ -681,6 +681,7 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
struct mthca_cq *cq;
int nent;
int err;
int entries = attr->cqe;
if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes)
return ERR_PTR(-EINVAL);
@ -1010,12 +1011,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata, int mr_id)
{
struct mthca_dev *dev = to_mdev(pd->device);
struct ib_umem_chunk *chunk;
struct scatterlist *sg;
struct mthca_mr *mr;
struct mthca_reg_mr ucmd;
u64 *pages;
int shift, n, len;
int i, j, k;
int i, k, entry;
int err = 0;
int write_mtt_size;
@ -1044,10 +1045,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
shift = ffs(mr->umem->page_size) - 1;
n = 0;
list_for_each_entry(chunk, &mr->umem->chunk_list, list)
n += chunk->nents;
n = mr->umem->nmap;;
mr->mtt = mthca_alloc_mtt(dev, n);
if (IS_ERR(mr->mtt)) {
err = PTR_ERR(mr->mtt);
@ -1064,25 +1062,27 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
list_for_each_entry(chunk, &mr->umem->chunk_list, list)
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = sg_dma_address(&chunk->page_list[j]) +
mr->umem->page_size * k;
/*
* Be friendly to write_mtt and pass it chunks
* of appropriate size.
*/
if (i == write_mtt_size) {
err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
if (err)
goto mtt_done;
n += i;
i = 0;
}
for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) {
len = sg_dma_len(sg) >> shift;
for (k = 0; k < len; ++k) {
pages[i++] = sg_dma_address(sg) +
mr->umem->page_size * k;
/*
* Be friendly to write_mtt and pass it chunks
* of appropriate size.
*/
if (i == write_mtt_size) {
err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
if (err)
goto mtt_done;
n += i;
i = 0;
}
}
}
if (i)
err = mthca_write_mtt(dev, mr->mtt, n, pages, i);

View File

@ -870,7 +870,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
attr_mask, IB_LINK_LAYER_UNSPECIFIED)) {
mthca_dbg(dev, "Bad QP transition (transport %d) "
"%d->%d with attr 0x%08x\n",
qp->transport, cur_state, new_state,

View File

@ -1,11 +0,0 @@
obj-$(CONFIG_INFINIBAND_IPOIB) += ib_ipoib.o
ib_ipoib-y := ipoib_main.o \
ipoib_ib.o \
ipoib_multicast.o \
ipoib_verbs.o \
ipoib_vlan.o \
ipoib_ethtool.o
ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_CM) += ipoib_cm.o
ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_DEBUG) += ipoib_fs.o

View File

@ -80,6 +80,7 @@
#include <linux/workqueue.h>
#include <linux/kref.h>
#include <linux/mutex.h>
#include <linux/rbtree.h>
#include <asm/atomic.h>
@ -313,6 +314,7 @@ struct ipoib_ethtool_st {
*/
struct ipoib_dev_priv {
spinlock_t lock;
spinlock_t drain_lock;
struct ifnet *dev;

View File

@ -383,6 +383,7 @@ ipoib_poll(struct ipoib_dev_priv *priv)
int n, i;
poll_more:
spin_lock(&priv->drain_lock);
for (;;) {
n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc);
@ -401,6 +402,7 @@ poll_more:
if (n != IPOIB_NUM_WC)
break;
}
spin_unlock(&priv->drain_lock);
if (ib_req_notify_cq(priv->recv_cq,
IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS))
@ -707,6 +709,7 @@ void ipoib_drain_cq(struct ipoib_dev_priv *priv)
{
int i, n;
spin_lock(&priv->drain_lock);
do {
n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc);
for (i = 0; i < n; ++i) {
@ -727,6 +730,7 @@ void ipoib_drain_cq(struct ipoib_dev_priv *priv)
ipoib_ib_handle_rx_wc(priv, priv->ibwc + i);
}
} while (n == IPOIB_NUM_WC);
spin_unlock(&priv->drain_lock);
spin_lock(&priv->lock);
while (ipoib_poll_tx(priv))

View File

@ -832,6 +832,7 @@ ipoib_priv_alloc(void)
priv = malloc(sizeof(struct ipoib_dev_priv), M_TEMP, M_ZERO|M_WAITOK);
spin_lock_init(&priv->lock);
spin_lock_init(&priv->drain_lock);
mutex_init(&priv->vlan_mutex);
INIT_LIST_HEAD(&priv->path_list);
INIT_LIST_HEAD(&priv->child_intfs);

View File

@ -466,12 +466,20 @@ void ipoib_mcast_join_task(struct work_struct *work)
struct ipoib_dev_priv *priv =
container_of(work, struct ipoib_dev_priv, mcast_task.work);
struct ifnet *dev = priv->dev;
struct ib_port_attr attr;
ipoib_dbg_mcast(priv, "Running join task. flags 0x%lX\n", priv->flags);
if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
return;
if (ib_query_port(priv->ca, priv->port, &attr) ||
attr.state != IB_PORT_ACTIVE) {
ipoib_dbg(priv, "%s: port state is not ACTIVE (state = %d) suspend task.\n",
__func__, attr.state);
return;
}
if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))
ipoib_warn(priv, "ib_query_gid() failed\n");
else

View File

@ -31,17 +31,20 @@
* SOFTWARE.
*/
#if !defined(IB_ADDR_H)
#ifndef IB_ADDR_H
#define IB_ADDR_H
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/if_arp.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/socket.h>
#include <linux/if_vlan.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_pack.h>
#include <linux/if_vlan.h>
#include <net/if_inet6.h>
#include <net/ipv6.h>
struct rdma_addr_client {
atomic_t refcount;
@ -72,7 +75,8 @@ struct rdma_dev_addr {
* rdma_translate_ip - Translate a local IP address to an RDMA hardware
* address.
*/
int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr);
int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
u16 *vlan_id);
/**
* rdma_resolve_ip - Resolve source and destination IP addresses to
@ -101,6 +105,9 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr);
int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
const unsigned char *dst_dev_addr);
int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id);
int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *smac,
u16 *vlan_id);
static inline int ip_addr_size(struct sockaddr *addr)
{
@ -130,50 +137,56 @@ static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr)
return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0;
}
static inline void iboe_mac_vlan_to_ll(union ib_gid *gid, u8 *mac, u16 vid)
{
memset(gid->raw, 0, 16);
*((u32 *)gid->raw) = cpu_to_be32(0xfe800000);
if (vid < 0x1000) {
gid->raw[12] = vid & 0xff;
gid->raw[11] = vid >> 8;
} else {
gid->raw[12] = 0xfe;
gid->raw[11] = 0xff;
}
memcpy(gid->raw + 13, mac + 3, 3);
memcpy(gid->raw + 8, mac, 3);
gid->raw[8] ^= 2;
}
static inline u16 rdma_vlan_dev_vlan_id(const struct net_device *dev)
{
#ifdef __linux__
return dev->priv_flags & IFF_802_1Q_VLAN ?
vlan_dev_vlan_id(dev) : 0xffff;
#else
uint16_t tag;
if (VLAN_TAG(__DECONST(struct ifnet *, dev), &tag) != 0)
return 0xffff;
return tag;
#endif
}
static inline int rdma_ip2gid(struct sockaddr *addr, union ib_gid *gid)
{
switch (addr->sa_family) {
case AF_INET:
ipv6_addr_set_v4mapped(((struct sockaddr_in *)addr)->sin_addr.s_addr,
(struct in6_addr *)gid);
break;
case AF_INET6:
memcpy(gid->raw, &((struct sockaddr_in6 *)addr)->sin6_addr,
16);
break;
default:
return -EINVAL;
}
return 0;
}
/* Important - sockaddr should be a union of sockaddr_in and sockaddr_in6 */
static inline int rdma_gid2ip(struct sockaddr *out, union ib_gid *gid)
{
if (ipv6_addr_v4mapped((struct in6_addr *)gid)) {
struct sockaddr_in *out_in = (struct sockaddr_in *)out;
memset(out_in, 0, sizeof(*out_in));
out_in->sin_len = sizeof(*out_in);
out_in->sin_family = AF_INET;
memcpy(&out_in->sin_addr.s_addr, gid->raw + 12, 4);
} else {
struct sockaddr_in6 *out_in = (struct sockaddr_in6 *)out;
memset(out_in, 0, sizeof(*out_in));
out_in->sin6_family = AF_INET6;
memcpy(&out_in->sin6_addr.s6_addr, gid->raw, 16);
}
return 0;
}
/* This func is called only in loopback ip address (127.0.0.1)
* case in which sgid is not relevant
*/
static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr,
union ib_gid *gid)
{
struct net_device *dev;
u16 vid = 0xffff;
dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
if (dev) {
vid = rdma_vlan_dev_vlan_id(dev);
dev_put(dev);
}
iboe_mac_vlan_to_ll(gid, dev_addr->src_dev_addr, vid);
}
static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
@ -223,27 +236,6 @@ static inline enum ib_mtu iboe_get_mtu(int mtu)
return 0;
}
#ifdef __linux__
static inline int iboe_get_rate(struct net_device *dev)
{
struct ethtool_cmd cmd;
if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings ||
dev->ethtool_ops->get_settings(dev, &cmd))
return IB_RATE_PORT_CURRENT;
if (cmd.speed >= 40000)
return IB_RATE_40_GBPS;
else if (cmd.speed >= 30000)
return IB_RATE_30_GBPS;
else if (cmd.speed >= 20000)
return IB_RATE_20_GBPS;
else if (cmd.speed >= 10000)
return IB_RATE_10_GBPS;
else
return IB_RATE_PORT_CURRENT;
}
#else
static inline int iboe_get_rate(struct net_device *dev)
{
if (dev->if_baudrate >= IF_Gbps(40))
@ -257,11 +249,10 @@ static inline int iboe_get_rate(struct net_device *dev)
else
return IB_RATE_PORT_CURRENT;
}
#endif
static inline int rdma_link_local_addr(struct in6_addr *addr)
{
if (addr->s6_addr32[0] == cpu_to_be32(0xfe800000) &&
if (addr->s6_addr32[0] == htonl(0xfe800000) &&
addr->s6_addr32[1] == 0)
return 1;
@ -280,6 +271,20 @@ static inline int rdma_is_multicast_addr(struct in6_addr *addr)
return addr->s6_addr[0] == 0xff;
}
static inline void resolve_mcast_mac(struct in6_addr *addr, u8 *mac)
{
if (addr->s6_addr[0] != 0xff)
return;
#ifdef DUAL_MODE_MCAST_MAC
if (addr->s6_addr[1] == 0x0e) /* IPv4 */
ip_eth_mc_map(addr->s6_addr32[3], mac);
else
#endif
ipv6_eth_mc_map(addr, mac);
}
static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac)
{
int i;
@ -300,12 +305,7 @@ static inline u16 rdma_get_vlan_id(union ib_gid *dgid)
static inline struct net_device *rdma_vlan_dev_real_dev(const struct net_device *dev)
{
#ifdef __linux__
return dev->priv_flags & IFF_802_1Q_VLAN ?
vlan_dev_real_dev(dev) : 0;
#else
return VLAN_TRUNKDEV(__DECONST(struct ifnet *, dev));
#endif
}
#endif /* IB_ADDR_H */

View File

@ -100,6 +100,22 @@ int ib_find_cached_pkey(struct ib_device *device,
u16 pkey,
u16 *index);
/**
* ib_find_exact_cached_pkey - Returns the PKey table index where a specified
* PKey value occurs. Comparison uses the FULL 16 bits (incl membership bit)
* @device: The device to query.
* @port_num: The port number of the device to search for the PKey.
* @pkey: The PKey value to search for.
* @index: The index into the cached PKey table where the PKey was found.
*
* ib_find_exact_cached_pkey() searches the specified PKey table in
* the local software cache.
*/
int ib_find_exact_cached_pkey(struct ib_device *device,
u8 port_num,
u16 pkey,
u16 *index);
/**
* ib_get_cached_lmc - Returns a cached lmc table entry
* @device: The device to query.

View File

@ -497,7 +497,7 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
* message.
* @cm_id: Connection identifier associated with the connection message.
* @service_timeout: The lower 5-bits specify the maximum time required for
* the sender to reply to to the connection message. The upper 3-bits
* the sender to reply to the connection message. The upper 3-bits
* specify additional control flags.
* @private_data: Optional user-defined private data sent with the
* message receipt acknowledgement.
@ -601,4 +601,6 @@ struct ib_cm_sidr_rep_param {
int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
struct ib_cm_sidr_rep_param *param);
int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac);
#endif /* IB_CM_H */

View File

@ -77,6 +77,15 @@
#define IB_MGMT_MAX_METHODS 128
/* MAD Status field bit masks */
#define IB_MGMT_MAD_STATUS_SUCCESS 0x0000
#define IB_MGMT_MAD_STATUS_BUSY 0x0001
#define IB_MGMT_MAD_STATUS_REDIRECT_REQD 0x0002
#define IB_MGMT_MAD_STATUS_BAD_VERSION 0x0004
#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD 0x0008
#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB 0x000c
#define IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE 0x001c
/* RMPP information */
#define IB_MGMT_RMPP_VERSION 1

View File

@ -263,7 +263,5 @@ int ib_ud_header_pack(struct ib_ud_header *header,
int ib_ud_header_unpack(void *buf,
struct ib_ud_header *header);
int ib_lrh_header_pack(struct ib_unpacked_lrh *lrh, void *buf);
int ib_lrh_header_unpack(void *buf, struct ib_unpacked_lrh *lrh);
#endif /* IB_PACK_H */

View File

@ -0,0 +1,59 @@
#if !defined(IB_PEER_MEM_H)
#define IB_PEER_MEM_H
#include <rdma/peer_mem.h>
struct invalidation_ctx;
struct ib_ucontext;
struct ib_peer_memory_statistics {
unsigned long num_alloc_mrs;
unsigned long num_dealloc_mrs;
unsigned long num_reg_pages;
unsigned long num_dereg_pages;
unsigned long num_free_callbacks;
};
struct ib_peer_memory_client {
const struct peer_memory_client *peer_mem;
struct list_head core_peer_list;
struct list_head core_ticket_list;
unsigned long last_ticket;
#ifdef __FreeBSD__
int holdcount;
int needwakeup;
struct cv peer_cv;
#else
struct srcu_struct peer_srcu;
#endif
struct mutex lock;
struct kobject *kobj;
struct attribute_group peer_mem_attr_group;
struct ib_peer_memory_statistics stats;
};
struct core_ticket {
unsigned long key;
void *context;
struct list_head ticket_list;
};
struct ib_peer_memory_client *ib_get_peer_client(struct ib_ucontext *context, unsigned long addr,
size_t size, void **peer_client_context,
int *srcu_key);
void ib_put_peer_client(struct ib_peer_memory_client *ib_peer_client,
void *peer_client_context,
int srcu_key);
unsigned long ib_peer_insert_context(struct ib_peer_memory_client *ib_peer_client,
void *context);
int ib_peer_remove_context(struct ib_peer_memory_client *ib_peer_client,
unsigned long key);
struct core_ticket *ib_peer_search_context(struct ib_peer_memory_client *ib_peer_client,
unsigned long key);
#endif

View File

@ -154,6 +154,9 @@ struct ib_sa_path_rec {
u8 packet_life_time_selector;
u8 packet_life_time;
u8 preference;
u8 smac[ETH_ALEN];
u8 dmac[6];
__be16 vlan_id;
};
#define IB_SA_MCMEMBER_REC_MGID IB_SA_COMP_MASK( 0)
@ -251,127 +254,6 @@ struct ib_sa_service_rec {
u64 data64[2];
};
enum {
IB_SA_EVENT_TYPE_FATAL = 0x0,
IB_SA_EVENT_TYPE_URGENT = 0x1,
IB_SA_EVENT_TYPE_SECURITY = 0x2,
IB_SA_EVENT_TYPE_SM = 0x3,
IB_SA_EVENT_TYPE_INFO = 0x4,
IB_SA_EVENT_TYPE_EMPTY = 0x7F,
IB_SA_EVENT_TYPE_ALL = 0xFFFF
};
enum {
IB_SA_EVENT_PRODUCER_TYPE_CA = 0x1,
IB_SA_EVENT_PRODUCER_TYPE_SWITCH = 0x2,
IB_SA_EVENT_PRODUCER_TYPE_ROUTER = 0x3,
IB_SA_EVENT_PRODUCER_TYPE_CLASS_MANAGER = 0x4,
IB_SA_EVENT_PRODUCER_TYPE_ALL = 0xFFFFFF
};
enum {
IB_SA_SM_TRAP_GID_IN_SERVICE = 64,
IB_SA_SM_TRAP_GID_OUT_OF_SERVICE = 65,
IB_SA_SM_TRAP_CREATE_MC_GROUP = 66,
IB_SA_SM_TRAP_DELETE_MC_GROUP = 67,
IB_SA_SM_TRAP_PORT_CHANGE_STATE = 128,
IB_SA_SM_TRAP_LINK_INTEGRITY = 129,
IB_SA_SM_TRAP_EXCESSIVE_BUFFER_OVERRUN = 130,
IB_SA_SM_TRAP_FLOW_CONTROL_UPDATE_EXPIRED = 131,
IB_SA_SM_TRAP_BAD_M_KEY = 256,
IB_SA_SM_TRAP_BAD_P_KEY = 257,
IB_SA_SM_TRAP_BAD_Q_KEY = 258,
IB_SA_SM_TRAP_SWITCH_BAD_P_KEY = 259,
IB_SA_SM_TRAP_ALL = 0xFFFF
};
struct ib_sa_inform {
union ib_gid gid;
__be16 lid_range_begin;
__be16 lid_range_end;
u8 is_generic;
u8 subscribe;
__be16 type;
union {
struct {
__be16 trap_num;
__be32 qpn;
u8 resp_time;
__be32 producer_type;
} generic;
struct {
__be16 device_id;
__be32 qpn;
u8 resp_time;
__be32 vendor_id;
} vendor;
} trap;
};
struct ib_sa_notice {
u8 is_generic;
u8 type;
union {
struct {
__be32 producer_type;
__be16 trap_num;
} generic;
struct {
__be32 vendor_id;
__be16 device_id;
} vendor;
} trap;
__be16 issuer_lid;
__be16 notice_count;
u8 notice_toggle;
/*
* Align data 16 bits off 64 bit field to match InformInfo definition.
* Data contained within this field will then align properly.
* See IB spec 1.2, sections 13.4.8.2 and 14.2.5.1.
*/
u8 reserved[5];
u8 data_details[54];
union ib_gid issuer_gid;
};
/*
* SM notice data details for:
*
* IB_SA_SM_TRAP_GID_IN_SERVICE = 64
* IB_SA_SM_TRAP_GID_OUT_OF_SERVICE = 65
* IB_SA_SM_TRAP_CREATE_MC_GROUP = 66
* IB_SA_SM_TRAP_DELETE_MC_GROUP = 67
*/
struct ib_sa_notice_data_gid {
u8 reserved[6];
u8 gid[16];
u8 padding[32];
};
/*
* SM notice data details for:
*
* IB_SA_SM_TRAP_PORT_CHANGE_STATE = 128
*/
struct ib_sa_notice_data_port_change {
__be16 lid;
u8 padding[52];
};
/*
* SM notice data details for:
*
* IB_SA_SM_TRAP_LINK_INTEGRITY = 129
* IB_SA_SM_TRAP_EXCESSIVE_BUFFER_OVERRUN = 130
* IB_SA_SM_TRAP_FLOW_CONTROL_UPDATE_EXPIRED = 131
*/
struct ib_sa_notice_data_port_error {
u8 reserved[2];
__be16 lid;
u8 port_num;
u8 padding[49];
};
#define IB_SA_GUIDINFO_REC_LID IB_SA_COMP_MASK(0)
#define IB_SA_GUIDINFO_REC_BLOCK_NUM IB_SA_COMP_MASK(1)
#define IB_SA_GUIDINFO_REC_RES1 IB_SA_COMP_MASK(2)
@ -528,56 +410,7 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
*/
void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec);
struct ib_inform_info {
void *context;
int (*callback)(int status,
struct ib_inform_info *info,
struct ib_sa_notice *notice);
u16 trap_number;
};
/**
* ib_sa_register_inform_info - Registers to receive notice events.
* @device: Device associated with the registration.
* @port_num: Port on the specified device to associate with the registration.
* @trap_number: InformInfo trap number to register for.
* @gfp_mask: GFP mask for memory allocations.
* @callback: User callback invoked once the registration completes and to
* report noticed events.
* @context: User specified context stored with the ib_inform_reg structure.
*
* This call initiates a registration request with the SA for the specified
* trap number. If the operation is started successfully, it returns
* an ib_inform_info structure that is used to track the registration operation.
* Users must free this structure by calling ib_unregister_inform_info,
* even if the operation later fails. (The callback status is non-zero.)
*
* If the registration fails; status will be non-zero. If the registration
* succeeds, the callback status will be zero, but the notice parameter will
* be NULL. If the notice parameter is not NULL, a trap or notice is being
* reported to the user.
*
* A status of -ENETRESET indicates that an error occurred which requires
* reregisteration.
*/
struct ib_inform_info *
ib_sa_register_inform_info(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
u16 trap_number, gfp_t gfp_mask,
int (*callback)(int status,
struct ib_inform_info *info,
struct ib_sa_notice *notice),
void *context);
/**
* ib_sa_unregister_inform_info - Releases an InformInfo registration.
* @info: InformInfo registration tracking structure.
*
* This call blocks until the registration request is destroyed. It may
* not be called from within the registration callback.
*/
void ib_sa_unregister_inform_info(struct ib_inform_info *info);
/* Support GuidInfoRecord */
int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
struct ib_sa_guidinfo_rec *rec,
@ -588,6 +421,4 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
void *context),
void *context,
struct ib_sa_query **sa_query);
#endif /* IB_SA_H */

View File

@ -37,9 +37,26 @@
#include <linux/scatterlist.h>
#include <linux/workqueue.h>
#include <linux/dma-attrs.h>
#include <linux/completion.h>
#include <rdma/ib_peer_mem.h>
struct ib_ucontext;
struct vm_area_struct;
struct ib_umem;
typedef void (*umem_invalidate_func_t)(void *invalidation_cookie,
struct ib_umem *umem,
unsigned long addr, size_t size);
struct invalidation_ctx {
struct ib_umem *umem;
umem_invalidate_func_t func;
void *cookie;
unsigned long context_ticket;
int peer_callback;
int inflight_invalidation;
int peer_invalidated;
struct completion comp;
};
struct ib_umem {
struct ib_ucontext *context;
@ -48,55 +65,29 @@ struct ib_umem {
int page_size;
int writable;
int hugetlb;
struct list_head chunk_list;
#ifdef __linux__
struct work_struct work;
struct mm_struct *mm;
#else
unsigned long start;
#endif
unsigned long diff;
};
struct ib_cmem {
struct ib_ucontext *context;
size_t length;
/* Link list of contiguous blocks being part of that cmem */
struct list_head ib_cmem_block;
/* Order of cmem block, 2^ block_order will equal number
of physical pages per block
*/
unsigned long block_order;
/* Refernce counter for that memory area
- When value became 0 pages will be returned to the kernel.
*/
struct kref refcount;
};
struct ib_umem_chunk {
struct list_head list;
int nents;
unsigned long start;
struct sg_table sg_head;
int nmap;
struct dma_attrs attrs;
struct scatterlist page_list[0];
int npages;
/* peer memory that manages this umem*/
struct ib_peer_memory_client *ib_peer_mem;
struct invalidation_ctx *invalidation_ctx;
int peer_mem_srcu_key;
/* peer memory private context */
void *peer_mem_client_context;
};
struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
size_t size, int access, int dmasync);
struct ib_umem *ib_umem_get_ex(struct ib_ucontext *context, unsigned long addr,
size_t size, int access, int dmasync,
int invalidation_supported);
void ib_umem_activate_invalidation_notifier(struct ib_umem *umem,
umem_invalidate_func_t func,
void *cookie);
void ib_umem_release(struct ib_umem *umem);
int ib_umem_page_count(struct ib_umem *umem);
int ib_cmem_map_contiguous_pages_to_vma(struct ib_cmem *ib_cmem,
struct vm_area_struct *vma);
struct ib_cmem *ib_cmem_alloc_contiguous_pages(struct ib_ucontext *context,
unsigned long total_size,
unsigned long page_size_order);
void ib_cmem_release_contiguous_pages(struct ib_cmem *cmem);
int ib_umem_map_to_vma(struct ib_umem *umem,
struct vm_area_struct *vma);
#endif /* IB_UMEM_H */

View File

@ -43,6 +43,13 @@
* compatibility are made.
*/
#define IB_USER_VERBS_ABI_VERSION 6
#define IB_USER_VERBS_CMD_THRESHOLD 50
/*
* To support 6 legacy commands using the old extension style
*/
#define IB_USER_VERBS_LEGACY_CMD_FIRST 52
#define IB_USER_VERBS_LEGACY_EX_CMD_LAST 56
enum {
IB_USER_VERBS_CMD_GET_CONTEXT,
@ -85,17 +92,15 @@ enum {
IB_USER_VERBS_CMD_OPEN_XRCD,
IB_USER_VERBS_CMD_CLOSE_XRCD,
IB_USER_VERBS_CMD_CREATE_XSRQ,
IB_USER_VERBS_CMD_OPEN_QP,
IB_USER_VERBS_CMD_ATTACH_FLOW,
IB_USER_VERBS_CMD_DETACH_FLOW,
IB_USER_VERBS_CMD_CREATE_XRC_SRQ,
IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP,
IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP,
IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP,
IB_USER_VERBS_CMD_REG_XRC_RCV_QP,
IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP,
IB_USER_VERBS_CMD_OPEN_QP
};
enum {
IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
IB_USER_VERBS_EX_CMD_DESTROY_FLOW
};
/*
* Make sure that all structs defined in this file remain laid out so
* that they pack the same way on 32-bit and 64-bit architectures (to
@ -125,12 +130,33 @@ struct ib_uverbs_comp_event_desc {
* the rest of the command struct based on these value.
*/
#define IBV_RESP_TO_VERBS_RESP_EX_RAW(ex_ptr, ex_type, ibv_type, field) \
((ibv_type *)((void *)(ex_ptr) + offsetof(ex_type, \
field) + sizeof((ex_ptr)->field)))
#define IBV_RESP_TO_VERBS_RESP_EX(ex_ptr, ex_type, ibv_type) \
IBV_RESP_TO_VERBS_RESP_EX_RAW(ex_ptr, ex_type, ibv_type, comp_mask)
#define IB_USER_VERBS_CMD_COMMAND_MASK 0xff
#define IB_USER_VERBS_CMD_FLAGS_MASK 0xff000000u
#define IB_USER_VERBS_CMD_FLAGS_SHIFT 24
#define IB_USER_VERBS_CMD_FLAG_EXTENDED 0x80
struct ib_uverbs_cmd_hdr {
__u32 command;
__u16 in_words;
__u16 out_words;
};
struct ib_uverbs_ex_cmd_hdr {
__u64 response;
__u16 provider_in_words;
__u16 provider_out_words;
__u32 cmd_hdr_reserved;
};
struct ib_uverbs_get_context {
__u64 response;
__u64 driver_data[0];
@ -146,6 +172,11 @@ struct ib_uverbs_query_device {
__u64 driver_data[0];
};
struct ib_uverbs_query_device_ex {
__u64 comp_mask;
__u64 driver_data[0];
};
struct ib_uverbs_query_device_resp {
__u64 fw_ver;
__be64 node_guid;
@ -269,6 +300,22 @@ struct ib_uverbs_dereg_mr {
__u32 mr_handle;
};
struct ib_uverbs_alloc_mw {
__u64 response;
__u32 pd_handle;
__u8 mw_type;
__u8 reserved[3];
};
struct ib_uverbs_alloc_mw_resp {
__u32 mw_handle;
__u32 rkey;
};
struct ib_uverbs_dealloc_mw {
__u32 mw_handle;
};
struct ib_uverbs_create_comp_channel {
__u64 response;
};
@ -292,6 +339,30 @@ struct ib_uverbs_create_cq_resp {
__u32 cqe;
};
enum ib_uverbs_create_cq_ex_comp_mask {
IB_UVERBS_CREATE_CQ_EX_CAP_FLAGS = (u64)1 << 0,
};
struct ib_uverbs_create_cq_ex {
__u64 comp_mask;
__u64 user_handle;
__u32 cqe;
__u32 comp_vector;
__s32 comp_channel;
__u32 reserved;
__u64 create_flags;
__u64 driver_data[0];
};
struct ib_uverbs_modify_cq_ex {
__u64 comp_mask;
__u32 cq_handle;
__u32 attr_mask;
__u16 cq_count;
__u16 cq_period;
__u32 cq_cap_flags;
};
struct ib_uverbs_resize_cq {
__u64 response;
__u32 cq_handle;
@ -543,6 +614,42 @@ struct ib_uverbs_modify_qp {
__u64 driver_data[0];
};
enum ib_uverbs_modify_qp_ex_comp_mask {
IB_UVERBS_QP_ATTR_DCT_KEY = 1ULL << 0,
};
struct ib_uverbs_modify_qp_ex {
__u32 comp_mask;
struct ib_uverbs_qp_dest dest;
struct ib_uverbs_qp_dest alt_dest;
__u32 qp_handle;
__u32 attr_mask;
__u32 qkey;
__u32 rq_psn;
__u32 sq_psn;
__u32 dest_qp_num;
__u32 qp_access_flags;
__u16 pkey_index;
__u16 alt_pkey_index;
__u8 qp_state;
__u8 cur_qp_state;
__u8 path_mtu;
__u8 path_mig_state;
__u8 en_sqd_async_notify;
__u8 max_rd_atomic;
__u8 max_dest_rd_atomic;
__u8 min_rnr_timer;
__u8 port_num;
__u8 timeout;
__u8 retry_cnt;
__u8 rnr_retry;
__u8 alt_port_num;
__u8 alt_timeout;
__u8 reserved[2];
__u64 dct_key;
__u64 driver_data[0];
};
struct ib_uverbs_modify_qp_resp {
};
@ -599,16 +706,6 @@ struct ib_uverbs_send_wr {
} wr;
};
struct ibv_uverbs_flow_spec {
__u32 type;
__be32 src_ip;
__be32 dst_ip;
__be16 src_port;
__be16 dst_port;
__u8 l4_protocol;
__u8 block_mc_loopback;
};
struct ib_uverbs_post_send {
__u64 response;
__u32 qp_handle;
@ -686,43 +783,117 @@ struct ib_uverbs_detach_mcast {
__u64 driver_data[0];
};
struct ibv_kern_flow_spec {
struct ib_uverbs_flow_spec_hdr {
__u32 type;
__u32 reserved1;
__u16 size;
__u16 reserved;
/* followed by flow_spec */
__u64 flow_spec_data[0];
};
struct ib_kern_eth_filter {
__u8 dst_mac[6];
__u8 src_mac[6];
__be16 ether_type;
__be16 vlan_tag;
};
struct ib_uverbs_flow_spec_eth {
union {
struct ib_uverbs_flow_spec_hdr hdr;
struct {
__be16 ethertype;
__be16 vlan;
__u8 vlan_present;
__u8 mac[6];
__u8 port;
} eth;
__u32 type;
__u16 size;
__u16 reserved;
};
};
struct ib_kern_eth_filter val;
struct ib_kern_eth_filter mask;
};
struct ib_kern_ib_filter {
__be32 l3_type_qpn;
__u8 dst_gid[16];
};
struct ib_uverbs_flow_spec_ib {
union {
struct ib_uverbs_flow_spec_hdr hdr;
struct {
__be32 qpn;
} ib_uc;
struct {
__u8 mgid[16];
} ib_mc;
} l2_id;
__u32 type;
__u16 size;
__u16 reserved;
};
};
struct ib_kern_ib_filter val;
struct ib_kern_ib_filter mask;
};
struct ib_kern_ipv4_filter {
__be32 src_ip;
__be32 dst_ip;
__be16 src_port;
};
struct ib_uverbs_flow_spec_ipv4 {
union {
struct ib_uverbs_flow_spec_hdr hdr;
struct {
__u32 type;
__u16 size;
__u16 reserved;
};
};
struct ib_kern_ipv4_filter val;
struct ib_kern_ipv4_filter mask;
};
struct ib_kern_tcp_udp_filter {
__be16 dst_port;
__u8 l4_protocol;
__u8 block_mc_loopback;
__be16 src_port;
};
struct ib_uverbs_flow_spec_tcp_udp {
union {
struct ib_uverbs_flow_spec_hdr hdr;
struct {
__u32 type;
__u16 size;
__u16 reserved;
};
};
struct ib_kern_tcp_udp_filter val;
struct ib_kern_tcp_udp_filter mask;
};
struct ib_uverbs_flow_attr {
__u32 type;
__u16 size;
__u16 priority;
__u8 num_of_specs;
__u8 reserved[2];
__u8 port;
__u32 flags;
/* Following are the optional layers according to user request
* struct ib_flow_spec_xxx
* struct ib_flow_spec_yyy
*/
struct ib_uverbs_flow_spec_hdr flow_specs[0];
};
struct ib_uverbs_attach_flow {
struct ib_uverbs_create_flow {
__u32 comp_mask;
__u32 qp_handle;
__u32 priority;
struct ibv_kern_flow_spec spec;
struct ib_uverbs_flow_attr flow_attr;
};
struct ib_uverbs_detach_flow {
__u32 qp_handle;
__u32 priority;
struct ibv_kern_flow_spec spec;
struct ib_uverbs_create_flow_resp {
__u32 comp_mask;
__u32 flow_handle;
};
struct ib_uverbs_destroy_flow {
__u32 comp_mask;
__u32 flow_handle;
};
struct ib_uverbs_create_srq {
@ -788,95 +959,22 @@ struct ib_uverbs_destroy_srq_resp {
__u32 events_reported;
};
struct ib_uverbs_open_xrc_domain {
/*
* Legacy extended verbs related structures
*/
struct ib_uverbs_ex_cmd_hdr_legacy {
__u32 command;
__u16 in_words;
__u16 out_words;
__u16 provider_in_words;
__u16 provider_out_words;
__u32 cmd_hdr_reserved;
};
struct ib_uverbs_ex_cmd_resp1_legacy {
__u64 comp_mask;
__u64 response;
__u32 fd;
__u32 oflags;
__u64 driver_data[0];
};
struct ib_uverbs_open_xrc_domain_resp {
__u32 xrcd_handle;
};
struct ib_uverbs_close_xrc_domain {
__u64 response;
__u32 xrcd_handle;
__u32 reserved;
__u64 driver_data[0];
};
struct ib_uverbs_create_xrc_rcv_qp {
__u64 response;
__u64 user_handle;
__u32 xrc_domain_handle;
__u32 max_send_wr;
__u32 max_recv_wr;
__u32 max_send_sge;
__u32 max_recv_sge;
__u32 max_inline_data;
__u8 sq_sig_all;
__u8 qp_type;
__u8 reserved[6];
__u64 driver_data[0];
};
struct ib_uverbs_create_xrc_rcv_qp_resp {
__u32 qpn;
__u32 reserved;
};
struct ib_uverbs_modify_xrc_rcv_qp {
__u32 xrc_domain_handle;
__u32 qp_num;
struct ib_uverbs_qp_dest dest;
struct ib_uverbs_qp_dest alt_dest;
__u32 attr_mask;
__u32 qkey;
__u32 rq_psn;
__u32 sq_psn;
__u32 dest_qp_num;
__u32 qp_access_flags;
__u16 pkey_index;
__u16 alt_pkey_index;
__u8 qp_state;
__u8 cur_qp_state;
__u8 path_mtu;
__u8 path_mig_state;
__u8 en_sqd_async_notify;
__u8 max_rd_atomic;
__u8 max_dest_rd_atomic;
__u8 min_rnr_timer;
__u8 port_num;
__u8 timeout;
__u8 retry_cnt;
__u8 rnr_retry;
__u8 alt_port_num;
__u8 alt_timeout;
__u8 reserved[6];
__u64 driver_data[0];
};
struct ib_uverbs_query_xrc_rcv_qp {
__u64 response;
__u32 xrc_domain_handle;
__u32 qp_num;
__u32 attr_mask;
__u32 reserved;
__u64 driver_data[0];
};
struct ib_uverbs_reg_xrc_rcv_qp {
__u32 xrc_domain_handle;
__u32 qp_num;
__u64 driver_data[0];
};
struct ib_uverbs_unreg_xrc_rcv_qp {
__u32 xrc_domain_handle;
__u32 qp_num;
__u64 driver_data[0];
};
#endif /* IB_USER_VERBS_H */

View File

@ -0,0 +1,204 @@
/*
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
* Copyright (c) 2005 PathScale, Inc. All rights reserved.
* Copyright (c) 2006 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef IB_USER_VERBS_EXP_H
#define IB_USER_VERBS_EXP_H
#include <rdma/ib_user_verbs.h>
enum {
IB_USER_VERBS_EXP_CMD_FIRST = 64
};
enum {
IB_USER_VERBS_EXP_CMD_CREATE_QP,
IB_USER_VERBS_EXP_CMD_MODIFY_CQ,
IB_USER_VERBS_EXP_CMD_MODIFY_QP,
IB_USER_VERBS_EXP_CMD_CREATE_CQ,
IB_USER_VERBS_EXP_CMD_QUERY_DEVICE,
IB_USER_VERBS_EXP_CMD_CREATE_DCT,
IB_USER_VERBS_EXP_CMD_DESTROY_DCT,
IB_USER_VERBS_EXP_CMD_QUERY_DCT,
};
/*
* Make sure that all structs defined in this file remain laid out so
* that they pack the same way on 32-bit and 64-bit architectures (to
* avoid incompatibility between 32-bit userspace and 64-bit kernels).
* Specifically:
* - Do not use pointer types -- pass pointers in __u64 instead.
* - Make sure that any structure larger than 4 bytes is padded to a
* multiple of 8 bytes. Otherwise the structure size will be
* different between 32-bit and 64-bit architectures.
*/
enum ib_uverbs_exp_create_qp_comp_mask {
IB_UVERBS_EXP_CREATE_QP_CAP_FLAGS = (1ULL << 0),
IB_UVERBS_EXP_CREATE_QP_INL_RECV = (1ULL << 1),
IB_UVERBS_EXP_CREATE_QP_QPG = (1ULL << 2)
};
struct ib_uverbs_qpg_init_attrib {
__u32 tss_child_count;
__u32 rss_child_count;
};
struct ib_uverbs_qpg {
__u32 qpg_type;
union {
struct {
__u32 parent_handle;
__u32 reserved;
};
struct ib_uverbs_qpg_init_attrib parent_attrib;
};
__u32 reserved2;
};
struct ib_uverbs_exp_create_qp {
__u64 comp_mask;
__u64 user_handle;
__u32 pd_handle;
__u32 send_cq_handle;
__u32 recv_cq_handle;
__u32 srq_handle;
__u32 max_send_wr;
__u32 max_recv_wr;
__u32 max_send_sge;
__u32 max_recv_sge;
__u32 max_inline_data;
__u8 sq_sig_all;
__u8 qp_type;
__u8 is_srq;
__u8 reserved;
__u64 qp_cap_flags;
__u32 max_inl_recv;
__u32 reserved1;
struct ib_uverbs_qpg qpg;
__u64 driver_data[0];
};
enum ib_uverbs_exp_create_qp_resp_comp_mask {
IB_UVERBS_EXP_CREATE_QP_RESP_INL_RECV = (1ULL << 0),
};
struct ib_uverbs_exp_create_qp_resp {
__u64 comp_mask;
__u32 qp_handle;
__u32 qpn;
__u32 max_send_wr;
__u32 max_recv_wr;
__u32 max_send_sge;
__u32 max_recv_sge;
__u32 max_inline_data;
__u32 max_inl_recv;
};
struct ib_uverbs_create_dct {
__u64 comp_mask;
__u64 user_handle;
__u32 pd_handle;
__u32 cq_handle;
__u32 srq_handle;
__u32 access_flags;
__u32 flow_label;
__u64 dc_key;
__u8 min_rnr_timer;
__u8 tclass;
__u8 port;
__u8 pkey_index;
__u8 gid_index;
__u8 hop_limit;
__u8 mtu;
__u8 rsvd;
__u32 create_flags;
__u64 driver_data[0];
};
struct ib_uverbs_create_dct_resp {
__u32 dct_handle;
__u32 dctn;
};
struct ib_uverbs_destroy_dct {
__u64 comp_mask;
__u64 user_handle;
};
struct ib_uverbs_destroy_dct_resp {
__u64 reserved;
};
struct ib_uverbs_query_dct {
__u64 comp_mask;
__u64 dct_handle;
__u64 driver_data[0];
};
struct ib_uverbs_query_dct_resp {
__u64 dc_key;
__u32 access_flags;
__u32 flow_label;
__u32 key_violations;
__u8 port;
__u8 min_rnr_timer;
__u8 tclass;
__u8 mtu;
__u8 pkey_index;
__u8 gid_index;
__u8 hop_limit;
__u8 state;
__u32 rsvd;
__u64 driver_data[0];
};
struct ib_uverbs_exp_query_device {
__u64 comp_mask;
__u64 driver_data[0];
};
struct ib_uverbs_exp_query_device_resp {
__u64 comp_mask;
struct ib_uverbs_query_device_resp base;
__u64 timestamp_mask;
__u64 hca_core_clock;
__u64 device_cap_flags2;
__u32 dc_rd_req;
__u32 dc_rd_res;
__u32 inline_recv_sz;
__u32 max_rss_tbl_sz;
};
#endif /* IB_USER_VERBS_EXP_H */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,100 @@
/*
* Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2004 Infinicon Corporation. All rights reserved.
* Copyright (c) 2004 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef IB_VERBS_EXP_H
#define IB_VERBS_EXP_H
#include <rdma/ib_verbs.h>
enum ib_exp_device_cap_flags2 {
IB_EXP_DEVICE_DC_TRANSPORT = 1 << 0,
IB_EXP_DEVICE_QPG = 1 << 1,
IB_EXP_DEVICE_UD_RSS = 1 << 2,
IB_EXP_DEVICE_UD_TSS = 1 << 3
};
enum ib_exp_device_attr_comp_mask {
IB_EXP_DEVICE_ATTR_WITH_TIMESTAMP_MASK = 1ULL << 1,
IB_EXP_DEVICE_ATTR_WITH_HCA_CORE_CLOCK = 1ULL << 2,
IB_EXP_DEVICE_ATTR_CAP_FLAGS2 = 1ULL << 3,
IB_EXP_DEVICE_ATTR_DC_REQ_RD = 1ULL << 4,
IB_EXP_DEVICE_ATTR_DC_RES_RD = 1ULL << 5,
IB_EXP_DEVICE_ATTR_INLINE_RECV_SZ = 1ULL << 6,
IB_EXP_DEVICE_ATTR_RSS_TBL_SZ = 1ULL << 7,
};
struct ib_exp_device_attr {
struct ib_device_attr base;
/* Use IB_EXP_DEVICE_ATTR_... for exp_comp_mask */
uint32_t exp_comp_mask;
uint64_t device_cap_flags2;
uint32_t dc_rd_req;
uint32_t dc_rd_res;
uint32_t inline_recv_sz;
uint32_t max_rss_tbl_sz;
};
struct ib_exp_qp_init_attr {
void (*event_handler)(struct ib_event *, void *);
void *qp_context;
struct ib_cq *send_cq;
struct ib_cq *recv_cq;
struct ib_srq *srq;
struct ib_xrcd *xrcd; /* XRC TGT QPs only */
struct ib_qp_cap cap;
union {
struct ib_qp *qpg_parent; /* see qpg_type */
struct ib_qpg_init_attrib parent_attrib;
};
enum ib_sig_type sq_sig_type;
enum ib_qp_type qp_type;
enum ib_qp_create_flags create_flags;
enum ib_qpg_type qpg_type;
u8 port_num; /* special QP types only */
u32 max_inl_recv;
};
int ib_exp_query_device(struct ib_device *device,
struct ib_exp_device_attr *device_attr);
#endif /* IB_VERBS_EXP_H */

View File

@ -46,24 +46,17 @@ enum iw_cm_event_type {
IW_CM_EVENT_CLOSE /* close complete */
};
enum iw_cm_event_status {
IW_CM_EVENT_STATUS_OK = 0, /* request successful */
IW_CM_EVENT_STATUS_ACCEPTED = 0, /* connect request accepted */
IW_CM_EVENT_STATUS_REJECTED, /* connect request rejected */
IW_CM_EVENT_STATUS_TIMEOUT, /* the operation timed out */
IW_CM_EVENT_STATUS_RESET, /* reset from remote peer */
IW_CM_EVENT_STATUS_EINVAL, /* asynchronous failure for bad parm */
};
struct iw_cm_event {
enum iw_cm_event_type event;
enum iw_cm_event_status status;
int status;
struct sockaddr_in local_addr;
struct sockaddr_in remote_addr;
void *private_data;
u8 private_data_len;
void *provider_data;
u8 private_data_len;
struct socket *so;
u8 ord;
u8 ird;
};
/**

View File

@ -0,0 +1,73 @@
/*
* Copyright (c) 2013, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#if !defined(PEER_MEM_H)
#define PEER_MEM_H
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/scatterlist.h>
#include <linux/mutex.h>
#define IB_PEER_MEMORY_NAME_MAX 64
#define IB_PEER_MEMORY_VER_MAX 16
struct peer_memory_client {
char name[IB_PEER_MEMORY_NAME_MAX];
char version[IB_PEER_MEMORY_VER_MAX];
/* acquire return code: 1 mine, 0 - not mine */
int (*acquire) (unsigned long addr, size_t size, void *peer_mem_private_data,
char *peer_mem_name, void **client_context);
int (*get_pages) (unsigned long addr,
size_t size, int write, int force,
struct sg_table *sg_head,
void *client_context, void *core_context);
int (*dma_map) (struct sg_table *sg_head, void *client_context,
struct device *dma_device, int dmasync, int *nmap);
int (*dma_unmap) (struct sg_table *sg_head, void *client_context,
struct device *dma_device);
void (*put_pages) (struct sg_table *sg_head, void *client_context);
unsigned long (*get_page_size) (void *client_context);
void (*release) (void *client_context);
};
typedef int (*invalidate_peer_memory)(void *reg_handle,
void *core_context);
void *ib_register_peer_memory_client(struct peer_memory_client *peer_client,
invalidate_peer_memory *invalidate_callback);
void ib_unregister_peer_memory_client(void *reg_handle);
#endif

View File

@ -59,15 +59,26 @@ enum rdma_cm_event_type {
RDMA_CM_EVENT_MULTICAST_JOIN,
RDMA_CM_EVENT_MULTICAST_ERROR,
RDMA_CM_EVENT_ADDR_CHANGE,
RDMA_CM_EVENT_TIMEWAIT_EXIT
RDMA_CM_EVENT_TIMEWAIT_EXIT,
RDMA_CM_EVENT_ALT_ROUTE_RESOLVED,
RDMA_CM_EVENT_ALT_ROUTE_ERROR,
RDMA_CM_EVENT_LOAD_ALT_PATH,
RDMA_CM_EVENT_ALT_PATH_LOADED,
};
enum rdma_port_space {
RDMA_PS_SDP = 0x0001,
RDMA_PS_IPOIB = 0x0002,
RDMA_PS_IB = 0x013F,
RDMA_PS_TCP = 0x0106,
RDMA_PS_UDP = 0x0111,
RDMA_PS_SCTP = 0x0183
};
enum alt_path_type {
RDMA_ALT_PATH_NONE,
RDMA_ALT_PATH_PORT,
RDMA_ALT_PATH_LID,
RDMA_ALT_PATH_BEST
};
struct rdma_addr {
@ -101,6 +112,7 @@ struct rdma_ud_param {
struct ib_ah_attr ah_attr;
u32 qp_num;
u32 qkey;
u8 alt_path_index;
};
struct rdma_cm_event {
@ -112,6 +124,20 @@ struct rdma_cm_event {
} param;
};
enum rdma_cm_state {
RDMA_CM_IDLE,
RDMA_CM_ADDR_QUERY,
RDMA_CM_ADDR_RESOLVED,
RDMA_CM_ROUTE_QUERY,
RDMA_CM_ROUTE_RESOLVED,
RDMA_CM_CONNECT,
RDMA_CM_DISCONNECT,
RDMA_CM_ADDR_BOUND,
RDMA_CM_LISTEN,
RDMA_CM_DEVICE_REMOVAL,
RDMA_CM_DESTROYING
};
struct rdma_cm_id;
/**
@ -131,7 +157,9 @@ struct rdma_cm_id {
rdma_cm_event_handler event_handler;
struct rdma_route route;
enum rdma_port_space ps;
enum ib_qp_type qp_type;
u8 port_num;
void *ucontext;
};
/**
@ -141,9 +169,11 @@ struct rdma_cm_id {
* returned rdma_id.
* @context: User specified context associated with the id.
* @ps: RDMA port space.
* @qp_type: type of queue pair associated with the id.
*/
struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
void *context, enum rdma_port_space ps);
void *context, enum rdma_port_space ps,
enum ib_qp_type qp_type);
/**
* rdma_destroy_id - Destroys an RDMA identifier.
@ -191,6 +221,19 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
*/
int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms);
/**
* rdma_enable_apm - Get ready to use APM for the given ID.
* Actual Alternate path discovery and load will take place only
* after a connection has been established.
*
* Calling this function only has an effect on the connection's client side.
* It should be called after rdma_resolve_route and before rdma_connect.
*
* @id: RDMA identifier.
* @alt_type: Alternate path type to resolve.
*/
int rdma_enable_apm(struct rdma_cm_id *id, enum alt_path_type alt_type);
/**
* rdma_create_qp - Allocate a QP and associate it with the specified RDMA
* identifier.
@ -330,4 +373,32 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr);
*/
void rdma_set_service_type(struct rdma_cm_id *id, int tos);
/**
* rdma_set_reuseaddr - Allow the reuse of local addresses when binding
* the rdma_cm_id.
* @id: Communication identifier to configure.
* @reuse: Value indicating if the bound address is reusable.
*
* Reuse must be set before an address is bound to the id.
*/
int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse);
/**
* rdma_set_afonly - Specify that listens are restricted to the
* bound address family only.
* @id: Communication identifer to configure.
* @afonly: Value indicating if listens are restricted.
*
* Must be set before identifier is in the listening state.
*/
int rdma_set_afonly(struct rdma_cm_id *id, int afonly);
/**
* rdma_set_timeout - Set the QP timeout associated with a connection
* identifier.
* @id: Communication identifier to associated with service type.
* @timeout: QP timeout
*/
void rdma_set_timeout(struct rdma_cm_id *id, int timeout);
#endif /* RDMA_CM_H */

View File

@ -77,7 +77,8 @@ struct rdma_ucm_create_id {
__u64 uid;
__u64 response;
__u16 ps;
__u8 reserved[6];
__u8 qp_type;
__u8 reserved[5];
};
struct rdma_ucm_create_id_resp {
@ -222,7 +223,11 @@ enum {
/* Option details */
enum {
RDMA_OPTION_ID_TOS = 0,
RDMA_OPTION_IB_PATH = 1
RDMA_OPTION_ID_REUSEADDR = 1,
RDMA_OPTION_ID_AFONLY = 2,
RDMA_OPTION_IB_PATH = 1,
RDMA_OPTION_IB_APM = 2,
};
struct rdma_ucm_set_option {