Update OFED to Linux 3.7 and update Mellanox drivers.

Update the OFED Infiniband core to the version supplied in Linux
version 3.7.

The update to OFED is nearly all additional defines and functions
with the exception of the addition of additional parameters to
ib_register_device() and the reg_user_mr callback.

In addition the ibcore (Infiniband core) and ipoib (IP over Infiniband)
have both been made into completely loadable modules to facilitate
testing of the OFED stack in FreeBSD.

Finally the Mellanox Infiniband drivers are now updated to the
latest version shipping with Linux 3.7.

Submitted by: Mellanox FreeBSD driver team:
                Oded Shanoon (odeds mellanox.com),
                Meny Yossefi (menyy mellanox.com),
                Orit Moskovich (oritm mellanox.com)

Approved by: re
This commit is contained in:
Alfred Perlstein 2013-09-29 00:35:03 +00:00
parent 64fa5e4ddf
commit c9f432b7ba
104 changed files with 25236 additions and 3676 deletions

View File

@ -3542,6 +3542,18 @@ ofed/drivers/infiniband/ulp/sdp/sdp_tx.c optional sdp inet \
no-depend \
compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/"
ofed/drivers/infiniband/hw/mlx4/alias_GUID.c optional mlx4ib \
no-depend obj-prefix "mlx4ib_" \
compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
ofed/drivers/infiniband/hw/mlx4/mcg.c optional mlx4ib \
no-depend obj-prefix "mlx4ib_" \
compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
ofed/drivers/infiniband/hw/mlx4/sysfs.c optional mlx4ib \
no-depend obj-prefix "mlx4ib_" \
compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
ofed/drivers/infiniband/hw/mlx4/cm.c optional mlx4ib \
no-depend obj-prefix "mlx4ib_" \
compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
ofed/drivers/infiniband/hw/mlx4/ah.c optional mlx4ib \
no-depend obj-prefix "mlx4ib_" \
compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
@ -3624,7 +3636,10 @@ ofed/drivers/net/mlx4/sense.c optional mlx4ib | mlxen \
ofed/drivers/net/mlx4/srq.c optional mlx4ib | mlxen \
no-depend obj-prefix "mlx4_" \
compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
ofed/drivers/net/mlx4/xrcd.c optional mlx4ib | mlxen \
ofed/drivers/net/mlx4/resource_tracker.c optional mlx4ib | mlxen \
no-depend obj-prefix "mlx4_" \
compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
ofed/drivers/net/mlx4/sys_tune.c optional mlx4ib | mlxen \
no-depend obj-prefix "mlx4_" \
compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"

View File

@ -28,21 +28,17 @@
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* $FreeBSD$
*/
#ifndef IB_UMEM_H
#define IB_UMEM_H
struct ib_ucontext;
#include <linux/list.h>
#include <linux/scatterlist.h>
#include <linux/workqueue.h>
#include <linux/dma-attrs.h>
struct ib_umem_chunk {
TAILQ_ENTRY(ib_umem_chunk) entry;
int nents;
int nmap;
struct rdma_scatterlist page_list[0];
};
struct ib_ucontext;
struct ib_umem {
struct ib_ucontext *context;
@ -50,28 +46,76 @@ struct ib_umem {
int offset;
int page_size;
int writable;
TAILQ_HEAD(, ib_umem_chunk) chunk_list;
#ifdef notyet
int hugetlb;
struct list_head chunk_list;
struct work_struct work;
struct mm_struct *mm;
#endif
unsigned long diff;
};
/* contiguous memory structure */
struct ib_cmem {
struct ib_ucontext *context;
size_t length;
/* Link list of contiguous blocks being part of that cmem */
struct list_head ib_cmem_block;
/* Order of cmem block, 2^ block_order will equal number
of physical pages per block
*/
unsigned long block_order;
/* Refernce counter for that memory area
- When value became 0 pages will be returned to the kernel.
*/
struct kref refcount;
};
struct ib_cmem_block {
struct list_head list;
/* page will point to the page struct of the head page
in the current compound page.
block order is saved once as part of ib_cmem.
*/
struct page *page;
};
struct ib_umem_chunk {
struct list_head list;
int nents;
int nmap;
struct dma_attrs attrs;
struct scatterlist page_list[0];
};
#ifdef CONFIG_INFINIBAND_USER_MEM
struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
size_t size, int access);
size_t size, int access, int dmasync);
void ib_umem_release(struct ib_umem *umem);
int ib_umem_page_count(struct ib_umem *umem);
int ib_cmem_map_contiguous_pages_to_vma(struct ib_cmem *ib_cmem,
struct vm_area_struct *vma);
struct ib_cmem *ib_cmem_alloc_contiguous_pages(struct ib_ucontext *context,
unsigned long total_size,
unsigned long page_size_order);
void ib_cmem_release_contiguous_pages(struct ib_cmem *cmem);
int ib_umem_map_to_vma(struct ib_umem *umem,
struct vm_area_struct *vma);
#else /* CONFIG_INFINIBAND_USER_MEM */
#include <linux/err.h>
static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context,
unsigned long addr, size_t size,
int access) {
return ERR_PTR(EINVAL);
int access, int dmasync) {
return ERR_PTR(-EINVAL);
}
static inline void ib_umem_release(struct ib_umem *umem) { }
static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; }

View File

@ -541,7 +541,8 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
u64 virt, int acc, struct ib_udata *udata,
int mr_id)
{
__be64 *pages;
int shift, i, n;
@ -1136,7 +1137,7 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref;
dev->ibdev.iwcm->get_qp = iwch_get_qp;
ret = ib_register_device(&dev->ibdev);
ret = ib_register_device(&dev->ibdev, NULL);
if (ret)
goto bail1;

View File

@ -128,6 +128,7 @@ SUBDIR= \
hwpmc \
${_hyperv} \
${_i2c} \
${_ibcore} \
${_ibcs2} \
${_ichwd} \
${_ida} \
@ -149,6 +150,7 @@ SUBDIR= \
${_igb} \
${_iir} \
${_io} \
${_ipoib} \
${_ipdivert} \
${_ipfilter} \
${_ipfw} \
@ -499,11 +501,17 @@ _fe= fe
_glxiic= glxiic
_glxsb= glxsb
_i2c= i2c
.if ${MK_OFED} != "no" || defined(ALL_MODULES)
_ibcore= ibcore
.endif
_ibcs2= ibcs2
_ie= ie
_if_ndis= if_ndis
_igb= igb
_io= io
.if ${MK_OFED} != "no" || defined(ALL_MODULES)
_ipoib= ipoib
.endif
_lindev= lindev
_linprocfs= linprocfs
_linsysfs= linsysfs
@ -675,6 +683,9 @@ _hptrr= hptrr
.endif
_hyperv= hyperv
_i2c= i2c
.if ${MK_OFED} != "no" || defined(ALL_MODULES)
_ibcore= ibcore
.endif
_ichwd= ichwd
_ida= ida
_if_ndis= if_ndis
@ -682,6 +693,9 @@ _igb= igb
_iir= iir
_io= io
_ipmi= ipmi
.if ${MK_OFED} != "no" || defined(ALL_MODULES)
_ipoib= ipoib
.endif
_ips= ips
_ipw= ipw
.if ${MK_SOURCELESS_UCODE} != "no"

View File

@ -0,0 +1,23 @@
# $FreeBSD$
.PATH: ${.CURDIR}/../../ofed/drivers/infiniband/core
.PATH: ${.CURDIR}/../../ofed/include/linux
.include <bsd.own.mk>
KMOD = ibcore
SRCS = addr.c cm_msgs.h iwcm.c mad_rmpp.h sa_query.c ucma.c uverbs_cmd.c
SRCS+= agent.c local_sa.c iwcm.h multicast.c smi.c ud_header.c uverbs_main.c
SRCS+= agent.h core_priv.h mad.c notice.c smi.h umem.c uverbs_marshall.c
SRCS+= cache.c device.c mad_priv.h packer.c sysfs.c user_mad.c verbs.c
SRCS+= cm.c fmr_pool.c mad_rmpp.c sa.h ucm.c uverbs.h cma.c
SRCS+= linux_compat.c linux_radix.c linux_idr.c
SRCS+= vnode_if.h device_if.h bus_if.h pci_if.h opt_inet.h
CFLAGS+= -I${.CURDIR}/../../ofed/drivers/infiniband/core
CFLAGS+= -I${.CURDIR}/../mlx4ib
CFLAGS+= -I${.CURDIR}/../../ofed/include/
CFLAGS+= -DINET6 -DINET -DOFED
.include <bsd.kmod.mk>
CFLAGS+= -Wno-cast-qual -Wno-pointer-arith -fms-extensions

View File

@ -0,0 +1,31 @@
# $FreeBSD$
.PATH: ${.CURDIR}/../../ofed/drivers/infiniband/ulp/ipoib
.PATH: ${.CURDIR}/../../ofed/include/linux
.include <bsd.own.mk>
KMOD = ipoib
SRCS = device_if.h bus_if.h opt_ofed.h vnode_if.h opt_inet.h opt_inet6.h
SRCS += ipoib_cm.c ipoib_ib.c ipoib_main.c ipoib_multicast.c ipoib_verbs.c ipoib.h
SRCS+= linux_compat.c linux_radix.c linux_idr.c
CFLAGS+= -I${.CURDIR}/../../ofed/drivers/infiniband/ulp/ipoib
CFLAGS+= -I${.CURDIR}/../ibcore
CFLAGS+= -I${.CURDIR}/../../ofed/include/
CFLAGS+= -DINET6 -DINET -DOFED
.if !defined(KERNBUILDDIR)
.if ${MK_INET_SUPPORT} != "no"
opt_inet.h:
@echo "#define INET 1" > ${.TARGET}
.endif
.if ${MK_INET6_SUPPORT} != "no"
opt_inet6.h:
@echo "#define INET6 1" > ${.TARGET}
.endif
.endif
.include <bsd.kmod.mk>
CFLAGS+= -Wno-cast-qual -Wno-pointer-arith -fms-extensions

View File

@ -3,11 +3,11 @@
.include <bsd.own.mk>
.PATH: ${.CURDIR}/../../ofed/include/linux
KMOD = mlx4
SRCS = device_if.h bus_if.h pci_if.h vnode_if.h
SRCS+= alloc.c catas.c cmd.c cq.c eq.c fw.c icm.c intf.c main.c mcg.c mr.c
SRCS+= pd.c port.c profile.c qp.c reset.c sense.c srq.c xrcd.c
SRCS+= opt_inet.h opt_inet6.h
SRCS+= alloc.c catas.c cmd.c cq.c eq.c fw.c icm.c intf.c main.c mcg.c mr.c linux_compat.c linux_radix.c linux_idr.c
SRCS+= pd.c port.c profile.c qp.c reset.c sense.c srq.c resource_tracker.c sys_tune.c
CFLAGS+= -I${.CURDIR}/../../ofed/drivers/net/mlx4
CFLAGS+= -I${.CURDIR}/../../ofed/include/

View File

@ -1,14 +1,21 @@
# $FreeBSD$
.PATH: ${.CURDIR}/../../ofed/drivers/infiniband/hw/mlx4
.PATH: ${.CURDIR}/../../ofed/include/linux
.include <bsd.own.mk>
KMOD = mlx4ib
SRCS = device_if.h bus_if.h pci_if.h vnode_if.h
SRCS+= ah.c cq.c doorbell.c mad.c main.c mr.c qp.c srq.c wc.c
SRCS+= linux_compat.c linux_radix.c linux_idr.c
SRCS+= alias_GUID.c mcg.c sysfs.c ah.c cq.c doorbell.c mad.c main.c mr.c qp.c srq.c wc.c cm.c
SRCS+= opt_inet.h opt_inet6.h
#CFLAGS+= -I${.CURDIR}/../../ofed/include/
#CFLAGS+= -I${.CURDIR}/../../../../include
CFLAGS+= -I${.CURDIR}/../../ofed/drivers/infiniband/hw/mlx4
CFLAGS+= -I${.CURDIR}/../../ofed/include/
CFLAGS+= -DCONFIG_INFINIBAND_USER_MEM
CFLAGS+= -DINET6 -DINET -DOFED
.if !defined(KERNBUILDDIR)
.if ${MK_INET_SUPPORT} != "no"

View File

@ -356,7 +356,7 @@ static int addr_resolve(struct sockaddr *src_in,
u_char edst[MAX_ADDR_LEN];
int multi;
int bcast;
int error;
int error = 0;
/*
* Determine whether the address is unicast, multicast, or broadcast

View File

@ -2957,7 +2957,7 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
0xFF10A01B)) {
/* IPv6 address is an SA assigned MGID. */
memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
} else if ((addr->sa_family == AF_INET6)) {
} else if (addr->sa_family == AF_INET6) {
ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
if (id_priv->id.ps == RDMA_PS_UDP)
mc_map[7] = 0x01; /* Use RDMA CM signature */

View File

@ -38,7 +38,8 @@
#include <rdma/ib_verbs.h>
int ib_device_register_sysfs(struct ib_device *device);
int ib_device_register_sysfs(struct ib_device *device, int (*port_callback)(struct ib_device *,
u8, struct kobject *));
void ib_device_unregister_sysfs(struct ib_device *device);
int ib_sysfs_setup(void);

View File

@ -273,7 +273,9 @@ static int read_port_table_lengths(struct ib_device *device)
* callback for each device that is added. @device must be allocated
* with ib_alloc_device().
*/
int ib_register_device(struct ib_device *device)
int ib_register_device(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *))
{
int ret;
@ -294,8 +296,6 @@ int ib_register_device(struct ib_device *device)
INIT_LIST_HEAD(&device->client_data_list);
spin_lock_init(&device->event_handler_lock);
spin_lock_init(&device->client_data_lock);
device->ib_uverbs_xrcd_table = RB_ROOT;
mutex_init(&device->xrcd_table_mutex);
ret = read_port_table_lengths(device);
if (ret) {
@ -304,7 +304,7 @@ int ib_register_device(struct ib_device *device)
goto out;
}
ret = ib_device_register_sysfs(device);
ret = ib_device_register_sysfs(device, port_callback);
if (ret) {
printk(KERN_WARNING "Couldn't register device %s with driver model\n",
device->name);
@ -752,3 +752,19 @@ static void __exit ib_core_cleanup(void)
module_init(ib_core_init);
module_exit(ib_core_cleanup);
#undef MODULE_VERSION
#include <sys/module.h>
static int
ibcore_evhand(module_t mod, int event, void *arg)
{
return (0);
}
static moduledata_t ibcore_mod = {
.name = "ibcore",
.evhand = ibcore_evhand,
};
MODULE_VERSION(ibcore, 1);
DECLARE_MODULE(ibcore, ibcore_mod, SI_SUB_SMP, SI_ORDER_ANY);

View File

@ -1105,6 +1105,27 @@ static void ib_sa_inform_release(struct ib_sa_query *sa_query)
kfree(container_of(sa_query, struct ib_sa_inform_query, sa_query));
}
int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
struct ib_sa_guidinfo_rec *rec,
ib_sa_comp_mask comp_mask, u8 method,
int timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_guidinfo_rec *resp,
void *context),
void *context,
struct ib_sa_query **sa_query)
{
// stub function -
// called originally from mad.c under mlx4_ib_init_sriov()
// which calls mlx4_ib_init_alias_guid_service() in alias_GUID.c
// which goes down to this function
printk("ERROR: function should be called only in SRIOV flow!!!");
return 0;
}
/**
* ib_sa_informinfo_query - Start an InformInfo registration.
* @client:SA client

View File

@ -38,6 +38,7 @@
#include <linux/string.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_pma.h>
struct ib_port {
struct kobject kobj;
@ -103,7 +104,7 @@ static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
return ret;
return sprintf(buf, "%d: %s\n", attr.state,
attr.state >= 0 && attr.state < ARRAY_SIZE(state_name) ?
attr.state < ARRAY_SIZE(state_name) ?
state_name[attr.state] : "UNKNOWN");
}
@ -292,118 +293,124 @@ static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
return sprintf(buf, "0x%04x\n", pkey);
}
#define PORT_PMA_ATTR(_name, _counter, _width, _offset) \
struct port_table_attribute port_pma_attr_##_name = { \
.attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
.index = (_offset) | ((_width) << 16) | ((_counter) << 24) \
static ssize_t get_pma_counters(struct ib_port *p, struct port_attribute *attr,
char *buf, int c_ext)
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
int offset = tab_attr->index & 0xffff;
int width = (tab_attr->index >> 16) & 0xff;
struct ib_mad *in_mad = NULL;
struct ib_mad *out_mad = NULL;
ssize_t ret;
if (!p->ibdev->process_mad)
return -ENXIO;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad) {
ret = -ENOMEM;
goto out;
}
in_mad->mad_hdr.base_version = 1;
in_mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_PERF_MGMT;
in_mad->mad_hdr.class_version = 1;
in_mad->mad_hdr.method = IB_MGMT_METHOD_GET;
if (c_ext)
in_mad->mad_hdr.attr_id = IB_PMA_PORT_COUNTERS_EXT;
else
in_mad->mad_hdr.attr_id = IB_PMA_PORT_COUNTERS;
in_mad->data[41] = p->port_num; /* PortSelect field */
if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY,
p->port_num, NULL, NULL, in_mad, out_mad) &
(IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) !=
(IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) {
ret = -EINVAL;
goto out;
}
switch (width) {
case 4:
ret = sprintf(buf, "%u\n", (out_mad->data[40 + offset / 8] >>
(4 - (offset % 8))) & 0xf);
break;
case 8:
ret = sprintf(buf, "%u\n", out_mad->data[40 + offset / 8]);
break;
case 16:
ret = sprintf(buf, "%u\n",
be16_to_cpup((__be16 *)(out_mad->data + 40 + offset / 8)));
break;
case 32:
ret = sprintf(buf, "%u\n",
be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8)));
break;
case 64:
ret = sprintf(buf, "%llu\n", (unsigned long long)
be64_to_cpup((__be64 *)(out_mad->data + 40 + offset / 8)));
break;
default:
ret = 0;
}
out:
kfree(in_mad);
kfree(out_mad);
return ret;
}
#define PORT_PMA_ATTR(_name, _counter, _width, _offset) \
struct port_table_attribute port_pma_attr_##_name = { \
.attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
.index = (_offset) | ((_width) << 16) | ((_counter) << 24) \
}
static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
char *buf)
char *buf)
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
int offset = tab_attr->index & 0xffff;
int width = (tab_attr->index >> 16) & 0xff;
struct ib_mad *in_mad = NULL;
struct ib_mad *out_mad = NULL;
ssize_t ret;
if (!p->ibdev->process_mad)
return sprintf(buf, "N/A (no PMA)\n");
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
if (!in_mad || !out_mad) {
ret = -ENOMEM;
goto out;
}
in_mad->mad_hdr.base_version = 1;
in_mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_PERF_MGMT;
in_mad->mad_hdr.class_version = 1;
in_mad->mad_hdr.method = IB_MGMT_METHOD_GET;
in_mad->mad_hdr.attr_id = cpu_to_be16(0x12); /* PortCounters */
in_mad->data[41] = p->port_num; /* PortSelect field */
if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY,
p->port_num, NULL, NULL, in_mad, out_mad) &
(IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) !=
(IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) {
ret = -EINVAL;
goto out;
}
switch (width) {
case 4:
ret = sprintf(buf, "%u\n", (out_mad->data[40 + offset / 8] >>
(4 - (offset % 8))) & 0xf);
break;
case 8:
ret = sprintf(buf, "%u\n", out_mad->data[40 + offset / 8]);
break;
case 16:
ret = sprintf(buf, "%u\n",
be16_to_cpup((__be16 *)(out_mad->data + 40 + offset / 8)));
break;
case 32:
ret = sprintf(buf, "%u\n",
be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8)));
break;
default:
ret = 0;
}
out:
kfree(in_mad);
kfree(out_mad);
return ret;
return get_pma_counters(p, attr, buf, 0);
}
static PORT_PMA_ATTR(symbol_error , 0, 16, 32);
static PORT_PMA_ATTR(link_error_recovery , 1, 8, 48);
static PORT_PMA_ATTR(link_downed , 2, 8, 56);
static PORT_PMA_ATTR(port_rcv_errors , 3, 16, 64);
static PORT_PMA_ATTR(symbol_error , 0, 16, 32);
static PORT_PMA_ATTR(link_error_recovery , 1, 8, 48);
static PORT_PMA_ATTR(link_downed , 2, 8, 56);
static PORT_PMA_ATTR(port_rcv_errors , 3, 16, 64);
static PORT_PMA_ATTR(port_rcv_remote_physical_errors, 4, 16, 80);
static PORT_PMA_ATTR(port_rcv_switch_relay_errors , 5, 16, 96);
static PORT_PMA_ATTR(port_xmit_discards , 6, 16, 112);
static PORT_PMA_ATTR(port_xmit_discards , 6, 16, 112);
static PORT_PMA_ATTR(port_xmit_constraint_errors , 7, 8, 128);
static PORT_PMA_ATTR(port_rcv_constraint_errors , 8, 8, 136);
static PORT_PMA_ATTR(port_rcv_constraint_errors , 8, 8, 136);
static PORT_PMA_ATTR(local_link_integrity_errors , 9, 4, 152);
static PORT_PMA_ATTR(excessive_buffer_overrun_errors, 10, 4, 156);
static PORT_PMA_ATTR(VL15_dropped , 11, 16, 176);
static PORT_PMA_ATTR(port_xmit_data , 12, 32, 192);
static PORT_PMA_ATTR(port_rcv_data , 13, 32, 224);
static PORT_PMA_ATTR(port_xmit_packets , 14, 32, 256);
static PORT_PMA_ATTR(port_rcv_packets , 15, 32, 288);
/*
* There is no bit allocated for port_xmit_wait in the CounterSelect field
* (IB spec). However, since this bit is ignored when reading
* (show_pma_counter), the _counter field of port_xmit_wait can be set to zero.
*/
static PORT_PMA_ATTR(port_xmit_wait , 0, 32, 320);
static PORT_PMA_ATTR(VL15_dropped , 11, 16, 176);
static PORT_PMA_ATTR(port_xmit_data , 12, 32, 192);
static PORT_PMA_ATTR(port_rcv_data , 13, 32, 224);
static PORT_PMA_ATTR(port_xmit_packets , 14, 32, 256);
static PORT_PMA_ATTR(port_rcv_packets , 15, 32, 288);
static struct attribute *pma_attrs[] = {
&port_pma_attr_symbol_error.attr.attr,
&port_pma_attr_link_error_recovery.attr.attr,
&port_pma_attr_link_downed.attr.attr,
&port_pma_attr_port_rcv_errors.attr.attr,
&port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
&port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
&port_pma_attr_port_xmit_discards.attr.attr,
&port_pma_attr_port_xmit_constraint_errors.attr.attr,
&port_pma_attr_port_rcv_constraint_errors.attr.attr,
&port_pma_attr_local_link_integrity_errors.attr.attr,
&port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
&port_pma_attr_VL15_dropped.attr.attr,
&port_pma_attr_port_xmit_data.attr.attr,
&port_pma_attr_port_rcv_data.attr.attr,
&port_pma_attr_port_xmit_packets.attr.attr,
&port_pma_attr_port_rcv_packets.attr.attr,
&port_pma_attr_port_xmit_wait.attr.attr,
NULL
&port_pma_attr_symbol_error.attr.attr,
&port_pma_attr_link_error_recovery.attr.attr,
&port_pma_attr_link_downed.attr.attr,
&port_pma_attr_port_rcv_errors.attr.attr,
&port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
&port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
&port_pma_attr_port_xmit_discards.attr.attr,
&port_pma_attr_port_xmit_constraint_errors.attr.attr,
&port_pma_attr_port_rcv_constraint_errors.attr.attr,
&port_pma_attr_local_link_integrity_errors.attr.attr,
&port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
&port_pma_attr_VL15_dropped.attr.attr,
&port_pma_attr_port_xmit_data.attr.attr,
&port_pma_attr_port_rcv_data.attr.attr,
&port_pma_attr_port_xmit_packets.attr.attr,
&port_pma_attr_port_rcv_packets.attr.attr,
NULL
};
static struct attribute_group pma_group = {
@ -411,6 +418,44 @@ static struct attribute_group pma_group = {
.attrs = pma_attrs
};
#define PORT_PMA_ATTR_EXT(_name, _counter, _width, _offset) \
struct port_table_attribute port_pma_attr_ext_##_name = { \
.attr = __ATTR(_name, S_IRUGO, show_pma_counter_ext, NULL), \
.index = (_offset) | ((_width) << 16) | ((_counter) << 24) \
}
static ssize_t show_pma_counter_ext(struct ib_port *p,
struct port_attribute *attr, char *buf)
{
return get_pma_counters(p, attr, buf, 1);
}
static PORT_PMA_ATTR_EXT(port_xmit_data_64 , 0, 64, 64);
static PORT_PMA_ATTR_EXT(port_rcv_data_64 , 0, 64, 128);
static PORT_PMA_ATTR_EXT(port_xmit_packets_64 , 0, 64, 192);
static PORT_PMA_ATTR_EXT(port_rcv_packets_64 , 0, 64, 256);
static PORT_PMA_ATTR_EXT(port_unicast_xmit_packets , 0, 64, 320);
static PORT_PMA_ATTR_EXT(port_unicast_rcv_packets , 0, 64, 384);
static PORT_PMA_ATTR_EXT(port_multicast_xmit_packets , 0, 64, 448);
static PORT_PMA_ATTR_EXT(port_multicast_rcv_packets , 0, 64, 512);
static struct attribute *pma_attrs_ext[] = {
&port_pma_attr_ext_port_xmit_data_64.attr.attr,
&port_pma_attr_ext_port_rcv_data_64.attr.attr,
&port_pma_attr_ext_port_xmit_packets_64.attr.attr,
&port_pma_attr_ext_port_rcv_packets_64.attr.attr,
&port_pma_attr_ext_port_unicast_xmit_packets.attr.attr,
&port_pma_attr_ext_port_unicast_rcv_packets.attr.attr,
&port_pma_attr_ext_port_multicast_xmit_packets.attr.attr,
&port_pma_attr_ext_port_multicast_rcv_packets.attr.attr,
NULL
};
static struct attribute_group pma_ext_group = {
.name = "counters_ext",
.attrs = pma_attrs_ext
};
static void ib_port_release(struct kobject *kobj)
{
struct ib_port *p = container_of(kobj, struct ib_port, kobj);
@ -503,7 +548,9 @@ alloc_group_attrs(ssize_t (*show)(struct ib_port *,
return NULL;
}
static int add_port(struct ib_device *device, int port_num)
static int add_port(struct ib_device *device, int port_num,
int (*port_callback)(struct ib_device *,
u8, struct kobject *))
{
struct ib_port *p;
struct ib_port_attr attr;
@ -522,7 +569,7 @@ static int add_port(struct ib_device *device, int port_num)
p->port_num = port_num;
ret = kobject_init_and_add(&p->kobj, &port_type,
device->ports_parent,
kobject_get(device->ports_parent),
"%d", port_num);
if (ret)
goto err_put;
@ -531,10 +578,14 @@ static int add_port(struct ib_device *device, int port_num)
if (ret)
goto err_put;
ret = sysfs_create_group(&p->kobj, &pma_ext_group);
if (ret)
goto err_remove_pma;
p->gid_group.name = "gids";
p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
if (!p->gid_group.attrs)
goto err_remove_pma;
goto err_remove_pma_ext;
ret = sysfs_create_group(&p->kobj, &p->gid_group);
if (ret)
@ -550,6 +601,12 @@ static int add_port(struct ib_device *device, int port_num)
if (ret)
goto err_free_pkey;
if (port_callback) {
ret = port_callback(device, port_num, &p->kobj);
if (ret)
goto err_remove_pkey;
}
list_add_tail(&p->kobj.entry, &device->port_list);
#ifdef __linux__
@ -557,6 +614,9 @@ static int add_port(struct ib_device *device, int port_num)
#endif
return 0;
err_remove_pkey:
sysfs_remove_group(&p->kobj, &p->pkey_group);
err_free_pkey:
for (i = 0; i < attr.pkey_tbl_len; ++i)
kfree(p->pkey_group.attrs[i]);
@ -572,6 +632,9 @@ static int add_port(struct ib_device *device, int port_num)
kfree(p->gid_group.attrs);
err_remove_pma_ext:
sysfs_remove_group(&p->kobj, &pma_ext_group);
err_remove_pma:
sysfs_remove_group(&p->kobj, &pma_group);
@ -786,16 +849,17 @@ static struct attribute_group iw_stats_group = {
.attrs = iw_proto_stats_attrs,
};
int ib_device_register_sysfs(struct ib_device *device)
int ib_device_register_sysfs(struct ib_device *device,
int (*port_callback)(struct ib_device *, u8, struct kobject *))
{
struct device *class_dev = &device->dev;
int ret;
int i;
class_dev->class = &ib_class;
class_dev->driver_data = device;
class_dev->parent = device->dma_device;
dev_set_name(class_dev, device->name);
dev_set_name(class_dev, device->name);
dev_set_drvdata(class_dev, device);
INIT_LIST_HEAD(&device->port_list);
@ -810,19 +874,19 @@ int ib_device_register_sysfs(struct ib_device *device)
}
device->ports_parent = kobject_create_and_add("ports",
&class_dev->kobj);
if (!device->ports_parent) {
kobject_get(&class_dev->kobj));
if (!device->ports_parent) {
ret = -ENOMEM;
goto err_put;
}
if (device->node_type == RDMA_NODE_IB_SWITCH) {
ret = add_port(device, 0);
ret = add_port(device, 0, port_callback);
if (ret)
goto err_put;
} else {
for (i = 1; i <= device->phys_port_cnt; ++i) {
ret = add_port(device, i);
ret = add_port(device, i, port_callback);
if (ret)
goto err_put;
}
@ -864,10 +928,15 @@ void ib_device_unregister_sysfs(struct ib_device *device)
{
struct kobject *p, *t;
struct ib_port *port;
int i;
/* Hold kobject until ib_dealloc_device() */
kobject_get(&device->dev.kobj);
for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) {
device_remove_file(&device->dev, ib_class_attributes[i]);
}
list_for_each_entry_safe(p, t, &device->port_list, entry) {
list_del(&p->entry);
port = container_of(p, struct ib_port, kobj);
@ -891,7 +960,7 @@ void ib_sysfs_cleanup(void)
class_unregister(&ib_class);
}
int ib_sysfs_create_port_files(struct ib_device *device,
/*int ib_sysfs_create_port_files(struct ib_device *device,
int (*create)(struct ib_device *dev, u8 port_num,
struct kobject *kobj))
{
@ -908,4 +977,4 @@ int ib_sysfs_create_port_files(struct ib_device *device,
return ret;
}
EXPORT_SYMBOL(ib_sysfs_create_port_files);
EXPORT_SYMBOL(ib_sysfs_create_port_files);*/

View File

@ -312,7 +312,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
INIT_LIST_HEAD(&ucontext->qp_list);
INIT_LIST_HEAD(&ucontext->srq_list);
INIT_LIST_HEAD(&ucontext->ah_list);
INIT_LIST_HEAD(&ucontext->xrc_domain_list);
INIT_LIST_HEAD(&ucontext->xrcd_list);
ucontext->closing = 0;
resp.num_comp_vectors = file->device->num_comp_vectors;
@ -633,7 +633,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
}
mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
cmd.access_flags, &udata);
cmd.access_flags, &udata, 0);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
goto err_put;
@ -1087,7 +1087,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
attr.srq = srq;
attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
attr.qp_type = cmd.qp_type;
attr.xrc_domain = xrcd;
attr.xrcd = xrcd;
attr.create_flags = 0;
attr.cap.max_send_wr = cmd.max_send_wr;
@ -1115,14 +1115,14 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
qp->event_handler = attr.event_handler;
qp->qp_context = attr.qp_context;
qp->qp_type = attr.qp_type;
qp->xrcd = attr.xrc_domain;
qp->xrcd = attr.xrcd;
atomic_inc(&pd->usecnt);
atomic_inc(&attr.send_cq->usecnt);
atomic_inc(&attr.recv_cq->usecnt);
if (attr.srq)
atomic_inc(&attr.srq->usecnt);
else if (attr.xrc_domain)
atomic_inc(&attr.xrc_domain->usecnt);
else if (attr.xrcd)
atomic_inc(&attr.xrcd->usecnt);
obj->uevent.uobject.object = qp;
ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
@ -2032,8 +2032,8 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
srq->uobject = &obj->uobject;
srq->event_handler = attr.event_handler;
srq->srq_context = attr.srq_context;
srq->xrc_cq = NULL;
srq->xrcd = NULL;
srq->ext.xrc.cq = NULL;
srq->ext.xrc.xrcd = NULL;
atomic_inc(&pd->usecnt);
atomic_set(&srq->usecnt, 0);
@ -2083,7 +2083,7 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len)
{
struct ib_uverbs_create_xrc_srq cmd;
struct ib_uverbs_create_xsrq cmd;
struct ib_uverbs_create_srq_resp resp;
struct ib_udata udata;
struct ib_uevent_object *obj;
@ -2119,7 +2119,7 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
goto err;
}
xrc_cq = idr_read_cq(cmd.xrc_cq, file->ucontext, 0);
xrc_cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
if (!xrc_cq) {
ret = -EINVAL;
goto err_put_pd;
@ -2152,8 +2152,8 @@ ssize_t ib_uverbs_create_xrc_srq(struct ib_uverbs_file *file,
srq->uobject = &obj->uobject;
srq->event_handler = attr.event_handler;
srq->srq_context = attr.srq_context;
srq->xrc_cq = xrc_cq;
srq->xrcd = xrcd;
srq->ext.xrc.cq = xrc_cq;
srq->ext.xrc.xrcd = xrcd;
atomic_inc(&pd->usecnt);
atomic_inc(&xrc_cq->usecnt);
atomic_inc(&xrcd->usecnt);
@ -2528,7 +2528,7 @@ ssize_t ib_uverbs_open_xrc_domain(struct ib_uverbs_file *file,
INIT_LIST_HEAD(&xrcd_uobj->xrc_reg_qp_list);
mutex_lock(&file->mutex);
list_add_tail(&uobj->list, &file->ucontext->xrc_domain_list);
list_add_tail(&uobj->list, &file->ucontext->xrcd_list);
mutex_unlock(&file->mutex);
uobj->live = 1;
@ -2598,7 +2598,7 @@ ssize_t ib_uverbs_close_xrc_domain(struct ib_uverbs_file *file,
if (!ret) {
list_for_each_entry(t_uobj, &file->ucontext->srq_list, list) {
struct ib_srq *srq = t_uobj->object;
if (srq->xrcd && srq->xrcd == uobj->object) {
if (srq->ext.xrc.xrcd && srq->ext.xrc.xrcd == uobj->object) {
ret = -EBUSY;
break;
}
@ -2702,7 +2702,7 @@ ssize_t ib_uverbs_create_xrc_rcv_qp(struct ib_uverbs_file *file,
init_attr.sq_sig_type =
cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
init_attr.qp_type = IB_QPT_XRC;
init_attr.xrc_domain = xrcd;
init_attr.xrcd = xrcd;
init_attr.cap.max_send_wr = 1;
init_attr.cap.max_recv_wr = 0;

View File

@ -110,8 +110,8 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
[IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
[IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
[IB_USER_VERBS_CMD_CREATE_XRC_SRQ] = ib_uverbs_create_xrc_srq,
[IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN] = ib_uverbs_open_xrc_domain,
[IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN] = ib_uverbs_close_xrc_domain,
[IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrc_domain,
[IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrc_domain,
[IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP] = ib_uverbs_create_xrc_rcv_qp,
[IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP] = ib_uverbs_modify_xrc_rcv_qp,
[IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP] = ib_uverbs_query_xrc_rcv_qp,
@ -258,7 +258,7 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
}
mutex_lock(&file->device->ib_dev->xrcd_table_mutex);
list_for_each_entry_safe(uobj, tmp, &context->xrc_domain_list, list) {
list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
struct ib_xrcd *xrcd = uobj->object;
struct ib_uxrc_rcv_object *xrc_qp_obj, *tmp1;
struct ib_uxrcd_object *xrcd_uobj =
@ -629,8 +629,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
if (hdr.in_words * 4 != count)
return -EINVAL;
if (hdr.command < 0 ||
hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
!uverbs_cmd_table[hdr.command] ||
!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
return -EINVAL;

View File

@ -250,8 +250,8 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
srq->uobject = NULL;
srq->event_handler = srq_init_attr->event_handler;
srq->srq_context = srq_init_attr->srq_context;
srq->xrc_cq = NULL;
srq->xrcd = NULL;
srq->ext.xrc.cq = NULL;
srq->ext.xrc.xrcd = NULL;
atomic_inc(&pd->usecnt);
atomic_set(&srq->usecnt, 0);
}
@ -278,8 +278,8 @@ struct ib_srq *ib_create_xrc_srq(struct ib_pd *pd,
srq->uobject = NULL;
srq->event_handler = srq_init_attr->event_handler;
srq->srq_context = srq_init_attr->srq_context;
srq->xrc_cq = xrc_cq;
srq->xrcd = xrcd;
srq->ext.xrc.cq = xrc_cq;
srq->ext.xrc.xrcd = xrcd;
atomic_inc(&pd->usecnt);
atomic_inc(&xrcd->usecnt);
atomic_inc(&xrc_cq->usecnt);
@ -319,8 +319,8 @@ int ib_destroy_srq(struct ib_srq *srq)
return -EBUSY;
pd = srq->pd;
xrc_cq = srq->xrc_cq;
xrcd = srq->xrcd;
xrc_cq = srq->ext.xrc.cq;
xrcd = srq->ext.xrc.xrcd;
ret = srq->device->destroy_srq(srq);
if (!ret) {
@ -355,7 +355,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
qp->qp_context = qp_init_attr->qp_context;
qp->qp_type = qp_init_attr->qp_type;
qp->xrcd = qp->qp_type == IB_QPT_XRC ?
qp_init_attr->xrc_domain : NULL;
qp_init_attr->xrcd : NULL;
atomic_inc(&pd->usecnt);
atomic_inc(&qp_init_attr->send_cq->usecnt);
atomic_inc(&qp_init_attr->recv_cq->usecnt);
@ -371,8 +371,8 @@ EXPORT_SYMBOL(ib_create_qp);
static const struct {
int valid;
enum ib_qp_attr_mask req_param[IB_QPT_RAW_ETH + 1];
enum ib_qp_attr_mask opt_param[IB_QPT_RAW_ETH + 1];
enum ib_qp_attr_mask req_param[IB_QPT_RAW_PACKET + 1];
enum ib_qp_attr_mask opt_param[IB_QPT_RAW_PACKET + 1];
} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
[IB_QPS_RESET] = {
[IB_QPS_RESET] = { .valid = 1 },
@ -382,7 +382,7 @@ static const struct {
[IB_QPT_UD] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_QKEY),
[IB_QPT_RAW_ETH] = IB_QP_PORT,
[IB_QPT_RAW_PACKET] = IB_QP_PORT,
[IB_QPT_UC] = (IB_QP_PKEY_INDEX |
IB_QP_PORT |
IB_QP_ACCESS_FLAGS),
@ -1005,7 +1005,7 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
switch (rdma_node_get_transport(qp->device->node_type)) {
case RDMA_TRANSPORT_IB:
if (qp->qp_type == IB_QPT_RAW_ETH) {
if (qp->qp_type == IB_QPT_RAW_PACKET) {
/* In raw Etherent mgids the 63 msb's should be 0 */
if (gid->global.subnet_prefix & cpu_to_be64(~1ULL))
return -EINVAL;
@ -1013,7 +1013,7 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
return -EINVAL;
break;
case RDMA_TRANSPORT_IWARP:
if (qp->qp_type != IB_QPT_RAW_ETH)
if (qp->qp_type != IB_QPT_RAW_PACKET)
return -EINVAL;
break;
}
@ -1028,7 +1028,7 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
switch (rdma_node_get_transport(qp->device->node_type)) {
case RDMA_TRANSPORT_IB:
if (qp->qp_type == IB_QPT_RAW_ETH) {
if (qp->qp_type == IB_QPT_RAW_PACKET) {
/* In raw Etherent mgids the 63 msb's should be 0 */
if (gid->global.subnet_prefix & cpu_to_be64(~1ULL))
return -EINVAL;
@ -1036,7 +1036,7 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
return -EINVAL;
break;
case RDMA_TRANSPORT_IWARP:
if (qp->qp_type != IB_QPT_RAW_ETH)
if (qp->qp_type != IB_QPT_RAW_PACKET)
return -EINVAL;
break;
}

View File

@ -1,5 +1,7 @@
config MLX4_INFINIBAND
tristate "Mellanox ConnectX HCA support"
depends on NETDEVICES && ETHERNET && PCI
select NET_VENDOR_MELLANOX
select MLX4_CORE
---help---
This driver provides low-level InfiniBand support for

View File

@ -1,4 +1,31 @@
obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o
# $FreeBSD$
#.PATH: ${.CURDIR}/../../ofed/drivers/infiniband/hw/mlx4
#.PATH: ${.CURDIR}/../../../../include/linux
mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o
mlx4_ib-y += wc.o
.include <bsd.own.mk>
KMOD = mlx4ib
SRCS = device_if.h bus_if.h pci_if.h vnode_if.h
#SRCS+= linux_compat.c linux_radix.c
SRCS+= ah.c cq.c doorbell.c mad.c main.c mr.c qp.c srq.c wc.c
SRCS+= opt_inet.h opt_inet6.h
#CFLAGS+= -I${.CURDIR}/../../ofed/include/
CFLAGS+= -I${.CURDIR}/../../../../include
CFLAGS+= -DCONFIG_INFINIBAND_USER_MEM
.if !defined(KERNBUILDDIR)
.if ${MK_INET_SUPPORT} != "no"
opt_inet.h:
@echo "#define INET 1" > ${.TARGET}
.endif
.if ${MK_INET6_SUPPORT} != "no"
opt_inet6.h:
@echo "#define INET6 1" > ${.TARGET}
.endif
.endif
.include <bsd.kmod.mk>
CFLAGS+= -Wno-cast-qual -Wno-pointer-arith -fms-extensions

View File

@ -30,25 +30,25 @@
* SOFTWARE.
*/
#include "mlx4_ib.h"
#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
#include <linux/slab.h>
#include <linux/inet.h>
#include <linux/string.h>
#include <rdma/ib_cache.h>
#include "mlx4_ib.h"
int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
u8 *mac, int *is_mcast, u8 port)
{
struct mlx4_ib_iboe *iboe = &dev->iboe;
struct in6_addr in6;
*is_mcast = 0;
spin_lock(&iboe->lock);
if (!iboe->netdevs[port - 1]) {
spin_unlock(&iboe->lock);
return -EINVAL;
}
spin_unlock(&iboe->lock);
memcpy(&in6, ah_attr->grh.dgid.raw, sizeof in6);
if (rdma_link_local_addr(&in6))
@ -92,15 +92,15 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
}
static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
struct mlx4_ib_ah *ah)
struct mlx4_ib_ah *ah)
{
struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
struct mlx4_dev *dev = ibdev->dev;
union ib_gid sgid;
u8 mac[6];
int err;
int is_mcast;
u16 vlan_tag;
union ib_gid sgid;
err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num);
if (err)
@ -130,7 +130,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
ah->av.ib.dlid = cpu_to_be16(0xc000);
memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16);
ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 29);
return &ah->ibah;
}
@ -147,25 +147,24 @@ struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
if (rdma_port_get_link_layer(pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) {
if (!(ah_attr->ah_flags & IB_AH_GRH)) {
ret = ERR_PTR(-EINVAL);
goto out;
} else {
/* TBD: need to handle the case when we get called
in an atomic context and there we might sleep. We
don't expect this currently since we're working with
link local addresses which we can translate without
going to sleep */
/*
* TBD: need to handle the case when we get
* called in an atomic context and there we
* might sleep. We don't expect this
* currently since we're working with link
* local addresses which we can translate
* without going to sleep.
*/
ret = create_iboe_ah(pd, ah_attr, ah);
if (IS_ERR(ret))
goto out;
else
return ret;
}
if (IS_ERR(ret))
kfree(ah);
return ret;
} else
return create_ib_ah(pd, ah_attr, ah); /* never fails */
out:
kfree(ah);
return ret;
}
int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
@ -202,4 +201,3 @@ int mlx4_ib_destroy_ah(struct ib_ah *ah)
kfree(to_mah(ah));
return 0;
}

View File

@ -0,0 +1,688 @@
/*
* Copyright (c) 2012 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/***********************************************************/
/*This file support the handling of the Alias GUID feature. */
/***********************************************************/
#include <rdma/ib_mad.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_sa.h>
#include <rdma/ib_pack.h>
#include <linux/mlx4/cmd.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <rdma/ib_user_verbs.h>
#include <linux/delay.h>
#include "mlx4_ib.h"
/*
The driver keeps the current state of all guids, as they are in the HW.
Whenever we receive an smp mad GUIDInfo record, the data will be cached.
*/
struct mlx4_alias_guid_work_context {
u8 port;
struct mlx4_ib_dev *dev ;
struct ib_sa_query *sa_query;
struct completion done;
int query_id;
struct list_head list;
int block_num;
};
struct mlx4_next_alias_guid_work {
u8 port;
u8 block_num;
struct mlx4_sriov_alias_guid_info_rec_det rec_det;
};
void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
u8 port_num, u8 *p_data)
{
int i;
u64 guid_indexes;
int slave_id;
int port_index = port_num - 1;
if (!mlx4_is_master(dev->dev))
return;
guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
ports_guid[port_num - 1].
all_rec_per_port[block_num].guid_indexes);
pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes);
for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
/* The location of the specific index starts from bit number 4
* until bit num 11 */
if (test_bit(i + 4, (unsigned long *)&guid_indexes)) {
slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
if (slave_id >= dev->dev->num_slaves) {
pr_debug("The last slave: %d\n", slave_id);
return;
}
/* cache the guid: */
memcpy(&dev->sriov.demux[port_index].guid_cache[slave_id],
&p_data[i * GUID_REC_SIZE],
GUID_REC_SIZE);
} else
pr_debug("Guid number: %d in block: %d"
" was not updated\n", i, block_num);
}
}
static __be64 get_cached_alias_guid(struct mlx4_ib_dev *dev, int port, int index)
{
if (index >= NUM_ALIAS_GUID_PER_PORT) {
pr_err("%s: ERROR: asked for index:%d\n", __func__, index);
return (__force __be64) -1;
}
return *(__be64 *)&dev->sriov.demux[port - 1].guid_cache[index];
}
ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index)
{
return IB_SA_COMP_MASK(4 + index);
}
/*
* Whenever new GUID is set/unset (guid table change) create event and
* notify the relevant slave (master also should be notified).
* If the GUID value is not as we have in the cache the slave will not be
* updated; in this case it waits for the smp_snoop or the port management
* event to call the function and to update the slave.
* block_number - the index of the block (16 blocks available)
* port_number - 1 or 2
*/
void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
int block_num, u8 port_num,
u8 *p_data)
{
int i;
u64 guid_indexes;
int slave_id;
enum slave_port_state new_state;
enum slave_port_state prev_state;
__be64 tmp_cur_ag, form_cache_ag;
enum slave_port_gen_event gen_event;
if (!mlx4_is_master(dev->dev))
return;
guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
ports_guid[port_num - 1].
all_rec_per_port[block_num].guid_indexes);
pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes);
/*calculate the slaves and notify them*/
for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
/* the location of the specific index runs from bits 4..11 */
if (!(test_bit(i + 4, (unsigned long *)&guid_indexes)))
continue;
slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
if (slave_id >= dev->dev->num_slaves)
return;
tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE];
form_cache_ag = get_cached_alias_guid(dev, port_num,
(NUM_ALIAS_GUID_IN_REC * block_num) + i);
/*
* Check if guid is not the same as in the cache,
* If it is different, wait for the snoop_smp or the port mgmt
* change event to update the slave on its port state change
*/
if (tmp_cur_ag != form_cache_ag)
continue;
mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
/*2 cases: Valid GUID, and Invalid Guid*/
if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/
prev_state = mlx4_get_slave_port_state(dev->dev, slave_id, port_num);
new_state = set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID,
&gen_event);
pr_debug("slave: %d, port: %d prev_port_state: %d,"
" new_port_state: %d, gen_event: %d\n",
slave_id, port_num, prev_state, new_state, gen_event);
if (gen_event == SLAVE_PORT_GEN_EVENT_UP) {
pr_debug("sending PORT_UP event to slave: %d, port: %d\n",
slave_id, port_num);
mlx4_gen_port_state_change_eqe(dev->dev, slave_id,
port_num, MLX4_PORT_CHANGE_SUBTYPE_ACTIVE);
}
} else { /* request to invalidate GUID */
set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
&gen_event);
pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
slave_id, port_num);
mlx4_gen_port_state_change_eqe(dev->dev, slave_id, port_num,
MLX4_PORT_CHANGE_SUBTYPE_DOWN);
}
}
}
static void aliasguid_query_handler(int status,
struct ib_sa_guidinfo_rec *guid_rec,
void *context)
{
struct mlx4_ib_dev *dev;
struct mlx4_alias_guid_work_context *cb_ctx = context;
u8 port_index ;
int i;
struct mlx4_sriov_alias_guid_info_rec_det *rec;
unsigned long flags, flags1;
if (!context)
return;
dev = cb_ctx->dev;
port_index = cb_ctx->port - 1;
rec = &dev->sriov.alias_guid.ports_guid[port_index].
all_rec_per_port[cb_ctx->block_num];
if (status) {
rec->status = MLX4_GUID_INFO_STATUS_IDLE;
pr_debug("(port: %d) failed: status = %d\n",
cb_ctx->port, status);
goto out;
}
if (guid_rec->block_num != cb_ctx->block_num) {
pr_err("block num mismatch: %d != %d\n",
cb_ctx->block_num, guid_rec->block_num);
goto out;
}
pr_debug("lid/port: %d/%d, block_num: %d\n",
be16_to_cpu(guid_rec->lid), cb_ctx->port,
guid_rec->block_num);
rec = &dev->sriov.alias_guid.ports_guid[port_index].
all_rec_per_port[guid_rec->block_num];
rec->status = MLX4_GUID_INFO_STATUS_SET;
rec->method = MLX4_GUID_INFO_RECORD_SET;
for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) {
__be64 tmp_cur_ag;
tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE];
/* check if the SM didn't assign one of the records.
* if it didn't, if it was not sysadmin request:
* ask the SM to give a new GUID, (instead of the driver request).
*/
if (tmp_cur_ag == MLX4_NOT_SET_GUID) {
mlx4_ib_warn(&dev->ib_dev, "%s:Record num %d in "
"block_num: %d was declined by SM, "
"ownership by %d (0 = driver, 1=sysAdmin,"
" 2=None)\n", __func__, i,
guid_rec->block_num, rec->ownership);
if (rec->ownership == MLX4_GUID_DRIVER_ASSIGN) {
/* if it is driver assign, asks for new GUID from SM*/
*(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
MLX4_NOT_SET_GUID;
/* Mark the record as not assigned, and let it
* be sent again in the next work sched.*/
rec->status = MLX4_GUID_INFO_STATUS_IDLE;
rec->guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
}
} else {
/* properly assigned record. */
/* We save the GUID we just got from the SM in the
* admin_guid in order to be persistent, and in the
* request from the sm the process will ask for the same GUID */
if (rec->ownership == MLX4_GUID_SYSADMIN_ASSIGN &&
tmp_cur_ag != *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]) {
/* the sysadmin assignment failed.*/
mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
" admin guid after SysAdmin "
"configuration. "
"Record num %d in block_num:%d "
"was declined by SM, "
"new val(0x%llx) was kept\n",
__func__, i,
guid_rec->block_num,
(long long)be64_to_cpu(*(__be64 *) &
rec->all_recs[i * GUID_REC_SIZE]));
} else {
memcpy(&rec->all_recs[i * GUID_REC_SIZE],
&guid_rec->guid_info_list[i * GUID_REC_SIZE],
GUID_REC_SIZE);
}
}
}
/*
The func is call here to close the cases when the
sm doesn't send smp, so in the sa response the driver
notifies the slave.
*/
mlx4_ib_notify_slaves_on_guid_change(dev, guid_rec->block_num,
cb_ctx->port,
guid_rec->guid_info_list);
out:
spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
if (!dev->sriov.is_going_down)
queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq,
&dev->sriov.alias_guid.ports_guid[port_index].
alias_guid_work, 0);
if (cb_ctx->sa_query) {
list_del(&cb_ctx->list);
kfree(cb_ctx);
} else
complete(&cb_ctx->done);
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
}
static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
{
int i;
u64 cur_admin_val;
ib_sa_comp_mask comp_mask = 0;
dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status
= MLX4_GUID_INFO_STATUS_IDLE;
dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].method
= MLX4_GUID_INFO_RECORD_SET;
/* calculate the comp_mask for that record.*/
for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
cur_admin_val =
*(u64 *)&dev->sriov.alias_guid.ports_guid[port - 1].
all_rec_per_port[index].all_recs[GUID_REC_SIZE * i];
/*
check the admin value: if it's for delete (~00LL) or
it is the first guid of the first record (hw guid) or
the records is not in ownership of the sysadmin and the sm doesn't
need to assign GUIDs, then don't put it up for assignment.
*/
if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val ||
(!index && !i) ||
MLX4_GUID_NONE_ASSIGN == dev->sriov.alias_guid.
ports_guid[port - 1].all_rec_per_port[index].ownership)
continue;
comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
}
dev->sriov.alias_guid.ports_guid[port - 1].
all_rec_per_port[index].guid_indexes = comp_mask;
}
static int set_guid_rec(struct ib_device *ibdev,
u8 port, int index,
struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
{
int err;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct ib_sa_guidinfo_rec guid_info_rec;
ib_sa_comp_mask comp_mask;
struct ib_port_attr attr;
struct mlx4_alias_guid_work_context *callback_context;
unsigned long resched_delay, flags, flags1;
struct list_head *head =
&dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
err = __mlx4_ib_query_port(ibdev, port, &attr, 1);
if (err) {
pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n",
err, port);
return err;
}
/*check the port was configured by the sm, otherwise no need to send */
if (attr.state != IB_PORT_ACTIVE) {
pr_debug("port %d not active...rescheduling\n", port);
resched_delay = 5 * HZ;
err = -EAGAIN;
goto new_schedule;
}
callback_context = kmalloc(sizeof *callback_context, GFP_KERNEL);
if (!callback_context) {
err = -ENOMEM;
resched_delay = HZ * 5;
goto new_schedule;
}
callback_context->port = port;
callback_context->dev = dev;
callback_context->block_num = index;
memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec));
guid_info_rec.lid = cpu_to_be16(attr.lid);
guid_info_rec.block_num = index;
memcpy(guid_info_rec.guid_info_list, rec_det->all_recs,
GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC);
comp_mask = IB_SA_GUIDINFO_REC_LID | IB_SA_GUIDINFO_REC_BLOCK_NUM |
rec_det->guid_indexes;
init_completion(&callback_context->done);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
list_add_tail(&callback_context->list, head);
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
callback_context->query_id =
ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client,
ibdev, port, &guid_info_rec,
comp_mask, rec_det->method, 1000,
GFP_KERNEL, aliasguid_query_handler,
callback_context,
&callback_context->sa_query);
if (callback_context->query_id < 0) {
pr_debug("ib_sa_guid_info_rec_query failed, query_id: "
"%d. will reschedule to the next 1 sec.\n",
callback_context->query_id);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
list_del(&callback_context->list);
kfree(callback_context);
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
resched_delay = 1 * HZ;
err = -EAGAIN;
goto new_schedule;
}
err = 0;
goto out;
new_schedule:
spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
invalidate_guid_record(dev, port, index);
if (!dev->sriov.is_going_down) {
queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
&dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
resched_delay);
}
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
out:
return err;
}
void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
{
int i;
unsigned long flags, flags1;
pr_debug("port %d\n", port);
spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++)
invalidate_guid_record(dev, port, i);
if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) {
/*
make sure no work waits in the queue, if the work is already
queued(not on the timer) the cancel will fail. That is not a problem
because we just want the work started.
*/
cancel_delayed_work(&dev->sriov.alias_guid.
ports_guid[port - 1].alias_guid_work);
queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
&dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
0);
}
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
}
/* The function returns the next record that was
* not configured (or failed to be configured) */
static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port,
struct mlx4_next_alias_guid_work *rec)
{
int j;
unsigned long flags;
for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
if (dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status ==
MLX4_GUID_INFO_STATUS_IDLE) {
memcpy(&rec->rec_det,
&dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j],
sizeof (struct mlx4_sriov_alias_guid_info_rec_det));
rec->port = port;
rec->block_num = j;
dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status =
MLX4_GUID_INFO_STATUS_PENDING;
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
return 0;
}
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
}
return -ENOENT;
}
static void set_administratively_guid_record(struct mlx4_ib_dev *dev, int port,
int rec_index,
struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
{
dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].guid_indexes =
rec_det->guid_indexes;
memcpy(dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].all_recs,
rec_det->all_recs, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].status =
rec_det->status;
}
static void set_all_slaves_guids(struct mlx4_ib_dev *dev, int port)
{
int j;
struct mlx4_sriov_alias_guid_info_rec_det rec_det ;
for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT ; j++) {
memset(rec_det.all_recs, 0, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
rec_det.guid_indexes = (!j ? 0 : IB_SA_GUIDINFO_REC_GID0) |
IB_SA_GUIDINFO_REC_GID1 | IB_SA_GUIDINFO_REC_GID2 |
IB_SA_GUIDINFO_REC_GID3 | IB_SA_GUIDINFO_REC_GID4 |
IB_SA_GUIDINFO_REC_GID5 | IB_SA_GUIDINFO_REC_GID6 |
IB_SA_GUIDINFO_REC_GID7;
rec_det.status = MLX4_GUID_INFO_STATUS_IDLE;
set_administratively_guid_record(dev, port, j, &rec_det);
}
}
static void alias_guid_work(struct work_struct *work)
{
struct delayed_work *delay = to_delayed_work(work);
int ret = 0;
struct mlx4_next_alias_guid_work *rec;
struct mlx4_sriov_alias_guid_port_rec_det *sriov_alias_port =
container_of(delay, struct mlx4_sriov_alias_guid_port_rec_det,
alias_guid_work);
struct mlx4_sriov_alias_guid *sriov_alias_guid = sriov_alias_port->parent;
struct mlx4_ib_sriov *ib_sriov = container_of(sriov_alias_guid,
struct mlx4_ib_sriov,
alias_guid);
struct mlx4_ib_dev *dev = container_of(ib_sriov, struct mlx4_ib_dev, sriov);
rec = kzalloc(sizeof *rec, GFP_KERNEL);
if (!rec) {
pr_err("alias_guid_work: No Memory\n");
return;
}
pr_debug("starting [port: %d]...\n", sriov_alias_port->port + 1);
ret = get_next_record_to_update(dev, sriov_alias_port->port, rec);
if (ret) {
pr_debug("No more records to update.\n");
goto out;
}
set_guid_rec(&dev->ib_dev, rec->port + 1, rec->block_num,
&rec->rec_det);
out:
kfree(rec);
}
void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port)
{
unsigned long flags, flags1;
if (!mlx4_is_master(dev->dev))
return;
spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
if (!dev->sriov.is_going_down) {
queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq,
&dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0);
}
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
}
void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev)
{
int i;
struct mlx4_ib_sriov *sriov = &dev->sriov;
struct mlx4_alias_guid_work_context *cb_ctx;
struct mlx4_sriov_alias_guid_port_rec_det *det;
struct ib_sa_query *sa_query;
unsigned long flags;
for (i = 0 ; i < dev->num_ports; i++) {
cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work);
det = &sriov->alias_guid.ports_guid[i];
spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
while (!list_empty(&det->cb_list)) {
cb_ctx = list_entry(det->cb_list.next,
struct mlx4_alias_guid_work_context,
list);
sa_query = cb_ctx->sa_query;
cb_ctx->sa_query = NULL;
list_del(&cb_ctx->list);
spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
ib_sa_cancel_query(cb_ctx->query_id, sa_query);
wait_for_completion(&cb_ctx->done);
kfree(cb_ctx);
spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
}
spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
}
for (i = 0 ; i < dev->num_ports; i++) {
flush_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
}
ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
kfree(dev->sriov.alias_guid.sa_client);
}
int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
{
char alias_wq_name[15];
int ret = 0;
int i, j, k;
union ib_gid gid;
if (!mlx4_is_master(dev->dev))
return 0;
dev->sriov.alias_guid.sa_client =
kzalloc(sizeof *dev->sriov.alias_guid.sa_client, GFP_KERNEL);
if (!dev->sriov.alias_guid.sa_client)
return -ENOMEM;
ib_sa_register_client(dev->sriov.alias_guid.sa_client);
spin_lock_init(&dev->sriov.alias_guid.ag_work_lock);
for (i = 1; i <= dev->num_ports; ++i) {
if (dev->ib_dev.query_gid(&dev->ib_dev , i, 0, &gid)) {
ret = -EFAULT;
goto err_unregister;
}
}
for (i = 0 ; i < dev->num_ports; i++) {
memset(&dev->sriov.alias_guid.ports_guid[i], 0,
sizeof (struct mlx4_sriov_alias_guid_port_rec_det));
/*Check if the SM doesn't need to assign the GUIDs*/
for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
if (mlx4_ib_sm_guid_assign) {
dev->sriov.alias_guid.ports_guid[i].
all_rec_per_port[j].
ownership = MLX4_GUID_DRIVER_ASSIGN;
continue;
}
dev->sriov.alias_guid.ports_guid[i].all_rec_per_port[j].
ownership = MLX4_GUID_NONE_ASSIGN;
/*mark each val as it was deleted,
till the sysAdmin will give it valid val*/
for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
*(__be64 *)&dev->sriov.alias_guid.ports_guid[i].
all_rec_per_port[j].all_recs[GUID_REC_SIZE * k] =
cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
}
}
INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list);
/*prepare the records, set them to be allocated by sm*/
for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++)
invalidate_guid_record(dev, i + 1, j);
dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid;
dev->sriov.alias_guid.ports_guid[i].port = i;
if (mlx4_ib_sm_guid_assign)
set_all_slaves_guids(dev, i);
snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i);
dev->sriov.alias_guid.ports_guid[i].wq =
create_singlethread_workqueue(alias_wq_name);
if (!dev->sriov.alias_guid.ports_guid[i].wq) {
ret = -ENOMEM;
goto err_thread;
}
INIT_DELAYED_WORK(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work,
alias_guid_work);
}
return 0;
err_thread:
for (--i; i >= 0; i--) {
destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
dev->sriov.alias_guid.ports_guid[i].wq = NULL;
}
err_unregister:
ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
kfree(dev->sriov.alias_guid.sa_client);
dev->sriov.alias_guid.sa_client = NULL;
pr_err("init_alias_guid_service: Failed. (ret:%d)\n", ret);
return ret;
}

View File

@ -0,0 +1,440 @@
/*
* Copyright (c) 2012 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <rdma/ib_mad.h>
#include <linux/mlx4/cmd.h>
#include <linux/idr.h>
#include <rdma/ib_cm.h>
#include "mlx4_ib.h"
#define CM_CLEANUP_CACHE_TIMEOUT (5 * HZ)
struct id_map_entry {
struct rb_node node;
u32 sl_cm_id;
u32 pv_cm_id;
int slave_id;
int scheduled_delete;
struct mlx4_ib_dev *dev;
struct list_head list;
struct delayed_work timeout;
};
struct cm_generic_msg {
struct ib_mad_hdr hdr;
__be32 local_comm_id;
__be32 remote_comm_id;
};
struct cm_req_msg {
unsigned char unused[0x60];
union ib_gid primary_path_sgid;
};
static void set_local_comm_id(struct ib_mad *mad, u32 cm_id)
{
struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
msg->local_comm_id = cpu_to_be32(cm_id);
}
static u32 get_local_comm_id(struct ib_mad *mad)
{
struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
return be32_to_cpu(msg->local_comm_id);
}
static void set_remote_comm_id(struct ib_mad *mad, u32 cm_id)
{
struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
msg->remote_comm_id = cpu_to_be32(cm_id);
}
static u32 get_remote_comm_id(struct ib_mad *mad)
{
struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
return be32_to_cpu(msg->remote_comm_id);
}
static union ib_gid gid_from_req_msg(struct ib_device *ibdev, struct ib_mad *mad)
{
struct cm_req_msg *msg = (struct cm_req_msg *)mad;
return msg->primary_path_sgid;
}
/* Lock should be taken before called */
static struct id_map_entry *
id_map_find_by_sl_id(struct ib_device *ibdev, u32 slave_id, u32 sl_cm_id)
{
struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
struct rb_node *node = sl_id_map->rb_node;
while (node) {
struct id_map_entry *id_map_entry =
rb_entry(node, struct id_map_entry, node);
if (id_map_entry->sl_cm_id > sl_cm_id)
node = node->rb_left;
else if (id_map_entry->sl_cm_id < sl_cm_id)
node = node->rb_right;
else if (id_map_entry->slave_id > slave_id)
node = node->rb_left;
else if (id_map_entry->slave_id < slave_id)
node = node->rb_right;
else
return id_map_entry;
}
return NULL;
}
static void id_map_ent_timeout(struct work_struct *work)
{
struct delayed_work *delay = to_delayed_work(work);
struct id_map_entry *ent = container_of(delay, struct id_map_entry, timeout);
struct id_map_entry *db_ent, *found_ent;
struct mlx4_ib_dev *dev = ent->dev;
struct mlx4_ib_sriov *sriov = &dev->sriov;
struct rb_root *sl_id_map = &sriov->sl_id_map;
int pv_id = (int) ent->pv_cm_id;
spin_lock(&sriov->id_map_lock);
db_ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_id);
if (!db_ent)
goto out;
found_ent = id_map_find_by_sl_id(&dev->ib_dev, ent->slave_id, ent->sl_cm_id);
if (found_ent && found_ent == ent)
rb_erase(&found_ent->node, sl_id_map);
idr_remove(&sriov->pv_id_table, pv_id);
out:
list_del(&ent->list);
spin_unlock(&sriov->id_map_lock);
kfree(ent);
}
static void id_map_find_del(struct ib_device *ibdev, int pv_cm_id)
{
struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
struct rb_root *sl_id_map = &sriov->sl_id_map;
struct id_map_entry *ent, *found_ent;
spin_lock(&sriov->id_map_lock);
ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_cm_id);
if (!ent)
goto out;
found_ent = id_map_find_by_sl_id(ibdev, ent->slave_id, ent->sl_cm_id);
if (found_ent && found_ent == ent)
rb_erase(&found_ent->node, sl_id_map);
idr_remove(&sriov->pv_id_table, pv_cm_id);
out:
spin_unlock(&sriov->id_map_lock);
}
static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new)
{
struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
struct rb_node **link = &sl_id_map->rb_node, *parent = NULL;
struct id_map_entry *ent;
int slave_id = new->slave_id;
int sl_cm_id = new->sl_cm_id;
ent = id_map_find_by_sl_id(ibdev, slave_id, sl_cm_id);
if (ent) {
pr_debug("overriding existing sl_id_map entry (cm_id = %x)\n",
sl_cm_id);
rb_replace_node(&ent->node, &new->node, sl_id_map);
return;
}
/* Go to the bottom of the tree */
while (*link) {
parent = *link;
ent = rb_entry(parent, struct id_map_entry, node);
if (ent->sl_cm_id > sl_cm_id || (ent->sl_cm_id == sl_cm_id && ent->slave_id > slave_id))
link = &(*link)->rb_left;
else
link = &(*link)->rb_right;
}
rb_link_node(&new->node, parent, link);
rb_insert_color(&new->node, sl_id_map);
}
static struct id_map_entry *
id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id)
{
int ret, id;
static int next_id;
struct id_map_entry *ent;
struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
ent = kmalloc(sizeof (struct id_map_entry), GFP_KERNEL);
if (!ent) {
mlx4_ib_warn(ibdev, "Couldn't allocate id cache entry - out of memory\n");
return ERR_PTR(-ENOMEM);
}
ent->sl_cm_id = sl_cm_id;
ent->slave_id = slave_id;
ent->scheduled_delete = 0;
ent->dev = to_mdev(ibdev);
INIT_DELAYED_WORK(&ent->timeout, id_map_ent_timeout);
do {
spin_lock(&to_mdev(ibdev)->sriov.id_map_lock);
ret = idr_get_new_above(&sriov->pv_id_table, ent,
next_id, &id);
if (!ret) {
next_id = ((unsigned) id + 1) & MAX_IDR_MASK;
ent->pv_cm_id = (u32)id;
sl_id_map_add(ibdev, ent);
}
spin_unlock(&sriov->id_map_lock);
} while (ret == -EAGAIN && idr_pre_get(&sriov->pv_id_table, GFP_KERNEL));
/*the function idr_get_new_above can return -ENOSPC, so don't insert in that case.*/
if (!ret) {
spin_lock(&sriov->id_map_lock);
list_add_tail(&ent->list, &sriov->cm_list);
spin_unlock(&sriov->id_map_lock);
return ent;
}
/*error flow*/
kfree(ent);
mlx4_ib_warn(ibdev, "No more space in the idr (err:0x%x)\n", ret);
return ERR_PTR(-ENOMEM);
}
static struct id_map_entry *
id_map_get(struct ib_device *ibdev, int *pv_cm_id, int sl_cm_id, int slave_id)
{
struct id_map_entry *ent;
struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
spin_lock(&sriov->id_map_lock);
if (*pv_cm_id == -1) {
ent = id_map_find_by_sl_id(ibdev, sl_cm_id, slave_id);
if (ent)
*pv_cm_id = (int) ent->pv_cm_id;
} else
ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, *pv_cm_id);
spin_unlock(&sriov->id_map_lock);
return ent;
}
static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id)
{
struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
unsigned long flags;
spin_lock(&sriov->id_map_lock);
spin_lock_irqsave(&sriov->going_down_lock, flags);
/*make sure that there is no schedule inside the scheduled work.*/
if (!sriov->is_going_down) {
id->scheduled_delete = 1;
schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
}
spin_unlock_irqrestore(&sriov->going_down_lock, flags);
spin_unlock(&sriov->id_map_lock);
}
int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
struct ib_mad *mad)
{
struct id_map_entry *id;
u32 sl_cm_id;
int pv_cm_id = -1;
sl_cm_id = get_local_comm_id(mad);
if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
mad->mad_hdr.attr_id == CM_REP_ATTR_ID) {
id = id_map_alloc(ibdev, slave_id, sl_cm_id);
if (IS_ERR(id)) {
mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n",
__func__, slave_id, sl_cm_id);
return PTR_ERR(id);
}
} else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID) {
return 0;
} else {
id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id);
}
if (!id) {
pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL!\n",
slave_id, sl_cm_id);
return -EINVAL;
}
set_local_comm_id(mad, id->pv_cm_id);
if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
schedule_delayed(ibdev, id);
else if (mad->mad_hdr.attr_id == CM_DREP_ATTR_ID)
id_map_find_del(ibdev, pv_cm_id);
return 0;
}
int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
struct ib_mad *mad, int is_eth)
{
u32 pv_cm_id;
struct id_map_entry *id;
if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) {
union ib_gid gid;
if (is_eth)
return 0;
gid = gid_from_req_msg(ibdev, mad);
*slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id);
if (*slave < 0) {
mlx4_ib_warn(ibdev, "failed matching slave_id by gid (0x%llx)\n",
gid.global.interface_id);
return -ENOENT;
}
return 0;
}
pv_cm_id = get_remote_comm_id(mad);
id = id_map_get(ibdev, (int *)&pv_cm_id, -1, -1);
if (!id) {
pr_debug("Couldn't find an entry for pv_cm_id 0x%x\n", pv_cm_id);
return -ENOENT;
}
if (!is_eth)
*slave = id->slave_id;
set_remote_comm_id(mad, id->sl_cm_id);
if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
schedule_delayed(ibdev, id);
else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID ||
mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) {
id_map_find_del(ibdev, (int) pv_cm_id);
}
return 0;
}
void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev)
{
spin_lock_init(&dev->sriov.id_map_lock);
INIT_LIST_HEAD(&dev->sriov.cm_list);
dev->sriov.sl_id_map = RB_ROOT;
idr_init(&dev->sriov.pv_id_table);
idr_pre_get(&dev->sriov.pv_id_table, GFP_KERNEL);
}
/* slave = -1 ==> all slaves */
/* TBD -- call paravirt clean for single slave. Need for slave RESET event */
void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
{
struct mlx4_ib_sriov *sriov = &dev->sriov;
struct rb_root *sl_id_map = &sriov->sl_id_map;
struct list_head lh;
struct rb_node *nd;
int need_flush = 1;
struct id_map_entry *map, *tmp_map;
/* cancel all delayed work queue entries */
INIT_LIST_HEAD(&lh);
spin_lock(&sriov->id_map_lock);
list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) {
if (slave < 0 || slave == map->slave_id) {
if (map->scheduled_delete)
need_flush &= !!cancel_delayed_work(&map->timeout);
}
}
spin_unlock(&sriov->id_map_lock);
if (!need_flush)
flush_scheduled_work(); /* make sure all timers were flushed */
/* now, remove all leftover entries from databases*/
spin_lock(&sriov->id_map_lock);
if (slave < 0) {
while (rb_first(sl_id_map)) {
struct id_map_entry *ent =
rb_entry(rb_first(sl_id_map),
struct id_map_entry, node);
rb_erase(&ent->node, sl_id_map);
idr_remove(&sriov->pv_id_table, (int) ent->pv_cm_id);
}
list_splice_init(&dev->sriov.cm_list, &lh);
} else {
/* first, move nodes belonging to slave to db remove list */
nd = rb_first(sl_id_map);
while (nd) {
struct id_map_entry *ent =
rb_entry(nd, struct id_map_entry, node);
nd = rb_next(nd);
if (ent->slave_id == slave)
list_move_tail(&ent->list, &lh);
}
/* remove those nodes from databases */
list_for_each_entry_safe(map, tmp_map, &lh, list) {
rb_erase(&map->node, sl_id_map);
idr_remove(&sriov->pv_id_table, (int) map->pv_cm_id);
}
/* add remaining nodes from cm_list */
list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) {
if (slave == map->slave_id)
list_move_tail(&map->list, &lh);
}
}
spin_unlock(&sriov->id_map_lock);
/* free any map entries left behind due to cancel_delayed_work above */
list_for_each_entry_safe(map, tmp_map, &lh, list) {
list_del(&map->list);
kfree(map);
}
}

View File

@ -33,13 +33,14 @@
#include <linux/mlx4/cq.h>
#include <linux/mlx4/qp.h>
#include <linux/mlx4/srq.h>
#include <linux/slab.h>
#include "mlx4_ib.h"
#include "user.h"
/* Which firmware version adds support for Resize CQ */
#define MLX4_FW_VER_RESIZE_CQ mlx4_fw_ver(2, 5, 0)
#define MLX4_FW_VER_IGNORE_OVERRUN_CQ mlx4_fw_ver(2, 7, 8200)
static void mlx4_ib_cq_comp(struct mlx4_cq *cq)
{
@ -53,7 +54,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
struct ib_cq *ibcq;
if (type != MLX4_EVENT_TYPE_CQ_ERROR) {
printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
pr_warn("Unexpected event type %d "
"on CQ %06x\n", type, cq->cqn);
return;
}
@ -69,7 +70,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)
{
return mlx4_buf_offset(&buf->buf, n * sizeof (struct mlx4_cqe));
return mlx4_buf_offset(&buf->buf, n * buf->entry_size);
}
static void *get_cqe(struct mlx4_ib_cq *cq, int n)
@ -80,8 +81,9 @@ static void *get_cqe(struct mlx4_ib_cq *cq, int n)
static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n)
{
struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe);
struct mlx4_cqe *tcqe = ((cq->buf.entry_size == 64) ? (cqe + 1) : cqe);
return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
return (!!(tcqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
!!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
}
@ -102,12 +104,13 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
{
int err;
err = mlx4_buf_alloc(dev->dev, nent * sizeof(struct mlx4_cqe),
err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
PAGE_SIZE * 2, &buf->buf);
if (err)
goto out;
buf->entry_size = dev->dev->caps.cqe_size;
err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift,
&buf->mtt);
if (err)
@ -123,8 +126,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
mlx4_mtt_cleanup(dev->dev, &buf->mtt);
err_buf:
mlx4_buf_free(dev->dev, nent * sizeof(struct mlx4_cqe),
&buf->buf);
mlx4_buf_free(dev->dev, nent * buf->entry_size, &buf->buf);
out:
return err;
@ -132,7 +134,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe)
{
mlx4_buf_free(dev->dev, (cqe + 1) * sizeof(struct mlx4_cqe), &buf->buf);
mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf);
}
static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context,
@ -140,14 +142,19 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont
u64 buf_addr, int cqe)
{
int err;
int cqe_size = dev->dev->caps.cqe_size;
int shift;
int n;
*umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe),
*umem = ib_umem_get(context, buf_addr, cqe * cqe_size,
IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(*umem))
return PTR_ERR(*umem);
err = mlx4_mtt_init(dev->dev, ib_umem_page_count(*umem),
ilog2((*umem)->page_size), &buf->mtt);
n = ib_umem_page_count(*umem);
shift = mlx4_ib_umem_calc_optimal_mtt_size(*umem, 0, &n);
err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt);
if (err)
goto err_buf;
@ -175,12 +182,10 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
struct mlx4_uar *uar;
int err;
if (entries < 1 || entries > dev->dev->caps.max_cqes) {
mlx4_ib_dbg("invalid num of entries: %d", entries);
if (entries < 1 || entries > dev->dev->caps.max_cqes)
return ERR_PTR(-EINVAL);
}
cq = kzalloc(sizeof *cq, GFP_KERNEL);
cq = kmalloc(sizeof *cq, GFP_KERNEL);
if (!cq)
return ERR_PTR(-ENOMEM);
@ -227,10 +232,11 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
uar = &dev->priv_uar;
}
if (dev->eq_table)
vector = dev->eq_table[vector % ibdev->num_comp_vectors];
err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
cq->db.dma, &cq->mcq,
vector == IB_CQ_VECTOR_LEAST_ATTACHED ?
MLX4_LEAST_ATTACHED_VECTOR : vector, 0);
cq->db.dma, &cq->mcq, vector, 0, 0);
if (err)
goto err_dbmap;
@ -335,16 +341,23 @@ static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
{
struct mlx4_cqe *cqe, *new_cqe;
int i;
int cqe_size = cq->buf.entry_size;
int cqe_inc = cqe_size == 64 ? 1 : 0;
i = cq->mcq.cons_index;
cqe = get_cqe(cq, i & cq->ibcq.cqe);
cqe += cqe_inc;
while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) {
new_cqe = get_cqe_from_buf(&cq->resize_buf->buf,
(i + 1) & cq->resize_buf->cqe);
memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe));
memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), cqe_size);
new_cqe += cqe_inc;
new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) |
(((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0);
cqe = get_cqe(cq, ++i & cq->ibcq.cqe);
cqe += cqe_inc;
}
++cq->mcq.cons_index;
}
@ -409,7 +422,7 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
} else {
struct mlx4_ib_cq_buf tmp_buf;
int tmp_cqe = 0;
spin_lock_irq(&cq->lock);
if (cq->resize_buf) {
mlx4_ib_cq_resize_copy_cqes(cq);
@ -445,9 +458,21 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
out:
mutex_unlock(&cq->resize_mutex);
return err;
}
int mlx4_ib_ignore_overrun_cq(struct ib_cq *ibcq)
{
struct mlx4_ib_dev *dev = to_mdev(ibcq->device);
struct mlx4_ib_cq *cq = to_mcq(ibcq);
if (dev->dev->caps.fw_ver < MLX4_FW_VER_IGNORE_OVERRUN_CQ)
return -ENOSYS;
return mlx4_cq_ignore_overrun(dev->dev, &cq->mcq);
}
int mlx4_ib_destroy_cq(struct ib_cq *cq)
{
struct mlx4_ib_dev *dev = to_mdev(cq->device);
@ -473,7 +498,7 @@ static void dump_cqe(void *cqe)
{
__be32 *buf = cqe;
printk(KERN_DEBUG "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
pr_debug("CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
be32_to_cpu(buf[0]), be32_to_cpu(buf[1]), be32_to_cpu(buf[2]),
be32_to_cpu(buf[3]), be32_to_cpu(buf[4]), be32_to_cpu(buf[5]),
be32_to_cpu(buf[6]), be32_to_cpu(buf[7]));
@ -483,7 +508,7 @@ static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe,
struct ib_wc *wc)
{
if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) {
printk(KERN_DEBUG "local QP operation err "
pr_debug("local QP operation err "
"(QPN %06x, WQE index %x, vendor syndrome %02x, "
"opcode = %02x)\n",
be32_to_cpu(cqe->my_qpn), be16_to_cpu(cqe->wqe_index),
@ -554,6 +579,26 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum)
checksum == cpu_to_be16(0xffff);
}
static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
unsigned tail, struct mlx4_cqe *cqe)
{
struct mlx4_ib_proxy_sqp_hdr *hdr;
ib_dma_sync_single_for_cpu(qp->ibqp.device,
qp->sqp_proxy_rcv[tail].map,
sizeof (struct mlx4_ib_proxy_sqp_hdr),
DMA_FROM_DEVICE);
hdr = (struct mlx4_ib_proxy_sqp_hdr *) (qp->sqp_proxy_rcv[tail].addr);
wc->pkey_index = be16_to_cpu(hdr->tun.pkey_index);
wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32);
wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
wc->src_qp = be32_to_cpu(hdr->tun.flags_src_qp) & 0xFFFFFF;
wc->wc_flags |= (hdr->tun.g_ml_path & 0x80) ? (IB_WC_GRH) : 0;
wc->dlid_path_bits = 0;
return 0;
}
static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
struct mlx4_ib_qp **cur_qp,
struct ib_wc *wc)
@ -562,18 +607,20 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
struct mlx4_qp *mqp;
struct mlx4_ib_wq *wq;
struct mlx4_ib_srq *srq;
struct mlx4_srq *msrq;
int is_send;
int is_error;
u32 g_mlpath_rqpn;
int is_xrc_recv = 0;
u16 wqe_ctr;
unsigned tail = 0;
repoll:
cqe = next_cqe_sw(cq);
if (!cqe)
return -EAGAIN;
if (cq->buf.entry_size == 64)
cqe++;
++cq->mcq.cons_index;
/*
@ -588,7 +635,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP &&
is_send)) {
printk(KERN_WARNING "Completion for NOP opcode detected!\n");
pr_warn("Completion for NOP opcode detected!\n");
return -EINVAL;
}
@ -608,24 +655,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
goto repoll;
}
if ((be32_to_cpu(cqe->vlan_my_qpn) & (1 << 23)) && !is_send) {
/*
* We do not have to take the XRC SRQ table lock here,
* because CQs will be locked while XRC SRQs are removed
* from the table.
*/
msrq = __mlx4_srq_lookup(to_mdev(cq->ibcq.device)->dev,
be32_to_cpu(cqe->g_mlpath_rqpn) &
0xffffff);
if (unlikely(!msrq)) {
printk(KERN_WARNING "CQ %06x with entry for unknown "
"XRC SRQ %06x\n", cq->mcq.cqn,
be32_to_cpu(cqe->g_mlpath_rqpn) & 0xffffff);
return -EINVAL;
}
is_xrc_recv = 1;
srq = to_mibsrq(msrq);
} else if (!*cur_qp ||
if (!*cur_qp ||
(be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) != (*cur_qp)->mqp.qpn) {
/*
* We do not have to take the QP table lock here,
@ -635,7 +665,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev,
be32_to_cpu(cqe->vlan_my_qpn));
if (unlikely(!mqp)) {
printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n",
pr_warn("CQ %06x with entry for unknown QPN %06x\n",
cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK);
return -EINVAL;
}
@ -643,7 +673,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
*cur_qp = to_mibqp(mqp);
}
wc->qp = is_xrc_recv ? NULL: &(*cur_qp)->ibqp;
wc->qp = &(*cur_qp)->ibqp;
if (is_send) {
wq = &(*cur_qp)->sq;
@ -653,10 +683,6 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
}
wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
++wq->tail;
} else if (is_xrc_recv) {
wqe_ctr = be16_to_cpu(cqe->wqe_index);
wc->wr_id = srq->wrid[wqe_ctr];
mlx4_ib_free_srq_wqe(srq, wqe_ctr);
} else if ((*cur_qp)->ibqp.srq) {
srq = to_msrq((*cur_qp)->ibqp.srq);
wqe_ctr = be16_to_cpu(cqe->wqe_index);
@ -664,7 +690,8 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
mlx4_ib_free_srq_wqe(srq, wqe_ctr);
} else {
wq = &(*cur_qp)->rq;
wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
tail = wq->tail & (wq->wqe_cnt - 1);
wc->wr_id = wq->wrid[tail];
++wq->tail;
}
@ -747,14 +774,26 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
break;
}
if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) {
if ((*cur_qp)->mlx4_ib_qp_type &
(MLX4_IB_QPT_PROXY_SMI_OWNER |
MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
return use_tunnel_data(*cur_qp, cq, wc, tail, cqe);
}
wc->slid = be16_to_cpu(cqe->rlid);
wc->sl = be16_to_cpu(cqe->sl_vid) >> 12;
g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn);
wc->src_qp = g_mlpath_rqpn & 0xffffff;
wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0;
wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
wc->csum_ok = mlx4_ib_ipoib_csum_ok(cqe->status, cqe->checksum);
wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status,
cqe->checksum) ? IB_WC_IP_CSUM_OK : 0;
if (rdma_port_get_link_layer(wc->qp->device,
(*cur_qp)->port) == IB_LINK_LAYER_ETHERNET)
wc->sl = be16_to_cpu(cqe->sl_vid) >> 13;
else
wc->sl = be16_to_cpu(cqe->sl_vid) >> 12;
}
return 0;
@ -776,8 +815,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
break;
}
if (npolled)
mlx4_cq_set_ci(&cq->mcq);
mlx4_cq_set_ci(&cq->mcq);
spin_unlock_irqrestore(&cq->lock, flags);
@ -804,10 +842,7 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
int nfreed = 0;
struct mlx4_cqe *cqe, *dest;
u8 owner_bit;
int is_xrc_srq = 0;
if (srq && srq->ibsrq.xrc_cq)
is_xrc_srq = 1;
int cqe_inc = cq->buf.entry_size == 64 ? 1 : 0;
/*
* First we need to find the current producer index, so we
@ -826,15 +861,16 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
*/
while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
if (((be32_to_cpu(cqe->vlan_my_qpn) & 0xffffff) == qpn) ||
(is_xrc_srq &&
(be32_to_cpu(cqe->g_mlpath_rqpn) & 0xffffff) ==
srq->msrq.srqn)) {
cqe += cqe_inc;
if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));
++nfreed;
} else if (nfreed) {
dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
dest += cqe_inc;
owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK;
memcpy(dest, cqe, sizeof *cqe);
dest->owner_sr_opcode = owner_bit |

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -37,38 +37,50 @@
#include <linux/compiler.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/idr.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_sa.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
#include <linux/rbtree.h>
#define MLX4_IB_DRV_NAME "mlx4_ib"
#ifdef CONFIG_MLX4_DEBUG
extern int mlx4_ib_debug_level;
#ifdef pr_fmt
#undef pr_fmt
#endif
#define pr_fmt(fmt) "<" MLX4_IB_DRV_NAME "> %s: " fmt, __func__
#define mlx4_ib_dbg(format, arg...) \
do { \
if (mlx4_ib_debug_level) \
printk(KERN_DEBUG "<" MLX4_IB_DRV_NAME "> %s: " format "\n",\
__func__, ## arg); \
} while (0)
#define mlx4_ib_warn(ibdev, format, arg...) \
dev_warn((ibdev)->dma_device, MLX4_IB_DRV_NAME ": " format, ## arg)
#else /* CONFIG_MLX4_DEBUG */
#define mlx4_ib_dbg(format, arg...) do {} while (0)
#endif /* CONFIG_MLX4_DEBUG */
#define mlx4_ib_info(ibdev, format, arg...) \
dev_info((ibdev)->dma_device, MLX4_IB_DRV_NAME ": " format, ## arg)
enum {
MLX4_IB_SQ_MIN_WQE_SHIFT = 6
MLX4_IB_SQ_MIN_WQE_SHIFT = 6,
MLX4_IB_MAX_HEADROOM = 2048
};
#define MLX4_IB_SQ_HEADROOM(shift) ((2048 >> (shift)) + 1)
#define MLX4_IB_SQ_MAX_SPARE (MLX4_IB_SQ_HEADROOM(MLX4_IB_SQ_MIN_WQE_SHIFT))
#define MLX4_IB_SQ_HEADROOM(shift) ((MLX4_IB_MAX_HEADROOM >> (shift)) + 1)
#define MLX4_IB_SQ_MAX_SPARE (MLX4_IB_SQ_HEADROOM(MLX4_IB_SQ_MIN_WQE_SHIFT))
/*module param to indicate if SM assigns the alias_GUID*/
extern int mlx4_ib_sm_guid_assign;
#ifdef __linux__
extern struct proc_dir_entry *mlx4_mrs_dir_entry;
#endif
#define MLX4_IB_UC_STEER_QPN_ALIGN 1
#define MLX4_IB_UC_MAX_NUM_QPS (256 * 1024)
#define MLX4_IB_MMAP_CMD_MASK 0xFF
#define MLX4_IB_MMAP_CMD_BITS 8
struct mlx4_ib_ucontext {
struct ib_ucontext ibucontext;
@ -83,15 +95,16 @@ struct mlx4_ib_pd {
};
struct mlx4_ib_xrcd {
struct ib_xrcd ibxrcd;
u32 xrcdn;
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_xrcd ibxrcd;
u32 xrcdn;
struct ib_pd *pd;
struct ib_cq *cq;
};
struct mlx4_ib_cq_buf {
struct mlx4_buf buf;
struct mlx4_mtt mtt;
int entry_size;
};
struct mlx4_ib_cq_resize {
@ -99,6 +112,11 @@ struct mlx4_ib_cq_resize {
int cqe;
};
struct mlx4_shared_mr_info {
int mr_id;
struct ib_umem *umem;
};
struct mlx4_ib_cq {
struct ib_cq ibcq;
struct mlx4_cq mcq;
@ -115,6 +133,7 @@ struct mlx4_ib_mr {
struct ib_mr ibmr;
struct mlx4_mr mmr;
struct ib_umem *umem;
struct mlx4_shared_mr_info *smr_info;
};
struct mlx4_ib_fast_reg_page_list {
@ -141,18 +160,127 @@ struct mlx4_ib_wq {
};
enum mlx4_ib_qp_flags {
MLX4_IB_QP_LSO = 1 << 0,
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
MLX4_IB_XRC_RCV = 1 << 2,
MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
MLX4_IB_SRIOV_SQP = 1 << 31,
};
struct gid_entry {
struct mlx4_ib_gid_entry {
struct list_head list;
union ib_gid gid;
int added;
u8 port;
};
enum mlx4_ib_mmap_cmd {
MLX4_IB_MMAP_UAR_PAGE = 0,
MLX4_IB_MMAP_BLUE_FLAME_PAGE = 1,
MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES = 2,
};
enum mlx4_ib_qp_type {
/*
* IB_QPT_SMI and IB_QPT_GSI have to be the first two entries
* here (and in that order) since the MAD layer uses them as
* indices into a 2-entry table.
*/
MLX4_IB_QPT_SMI = IB_QPT_SMI,
MLX4_IB_QPT_GSI = IB_QPT_GSI,
MLX4_IB_QPT_RC = IB_QPT_RC,
MLX4_IB_QPT_UC = IB_QPT_UC,
MLX4_IB_QPT_UD = IB_QPT_UD,
MLX4_IB_QPT_RAW_IPV6 = IB_QPT_RAW_IPV6,
MLX4_IB_QPT_RAW_ETHERTYPE = IB_QPT_RAW_ETHERTYPE,
MLX4_IB_QPT_RAW_PACKET = IB_QPT_RAW_PACKET,
MLX4_IB_QPT_XRC_INI = IB_QPT_XRC_INI,
MLX4_IB_QPT_XRC_TGT = IB_QPT_XRC_TGT,
MLX4_IB_QPT_PROXY_SMI_OWNER = 1 << 16,
MLX4_IB_QPT_PROXY_SMI = 1 << 17,
MLX4_IB_QPT_PROXY_GSI = 1 << 18,
MLX4_IB_QPT_TUN_SMI_OWNER = 1 << 19,
MLX4_IB_QPT_TUN_SMI = 1 << 20,
MLX4_IB_QPT_TUN_GSI = 1 << 21,
};
#define MLX4_IB_QPT_ANY_SRIOV (MLX4_IB_QPT_PROXY_SMI_OWNER | \
MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER | \
MLX4_IB_QPT_TUN_SMI | MLX4_IB_QPT_TUN_GSI)
enum mlx4_ib_mad_ifc_flags {
MLX4_MAD_IFC_IGNORE_MKEY = 1,
MLX4_MAD_IFC_IGNORE_BKEY = 2,
MLX4_MAD_IFC_IGNORE_KEYS = (MLX4_MAD_IFC_IGNORE_MKEY |
MLX4_MAD_IFC_IGNORE_BKEY),
MLX4_MAD_IFC_NET_VIEW = 4,
};
enum {
MLX4_NUM_TUNNEL_BUFS = 256,
};
struct mlx4_ib_tunnel_header {
struct mlx4_av av;
__be32 remote_qpn;
__be32 qkey;
__be16 vlan;
u8 mac[6];
__be16 pkey_index;
u8 reserved[6];
};
struct mlx4_ib_buf {
void *addr;
dma_addr_t map;
};
struct mlx4_rcv_tunnel_hdr {
__be32 flags_src_qp; /* flags[6:5] is defined for VLANs:
* 0x0 - no vlan was in the packet
* 0x01 - C-VLAN was in the packet */
u8 g_ml_path; /* gid bit stands for ipv6/4 header in RoCE */
u8 reserved;
__be16 pkey_index;
__be16 sl_vid;
__be16 slid_mac_47_32;
__be32 mac_31_0;
};
struct mlx4_ib_proxy_sqp_hdr {
struct ib_grh grh;
struct mlx4_rcv_tunnel_hdr tun;
} __packed;
struct mlx4_roce_smac_vlan_info {
u64 smac;
int smac_index;
int smac_port;
u64 candidate_smac;
int candidate_smac_index;
int candidate_smac_port;
u16 vid;
int vlan_index;
int vlan_port;
u16 candidate_vid;
int candidate_vlan_index;
int candidate_vlan_port;
int update_vid;
};
struct mlx4_ib_qpg_data {
unsigned long *tss_bitmap;
unsigned long *rss_bitmap;
struct mlx4_ib_qp *qpg_parent;
int tss_qpn_base;
int rss_qpn_base;
u32 tss_child_count;
u32 rss_child_count;
u32 qpg_tss_mask_sz;
};
struct mlx4_ib_qp {
struct ib_qp ibqp;
struct mlx4_qp mqp;
@ -168,14 +296,13 @@ struct mlx4_ib_qp {
int sq_spare_wqes;
struct mlx4_ib_wq sq;
enum mlx4_ib_qp_type mlx4_ib_qp_type;
struct ib_umem *umem;
struct mlx4_mtt mtt;
int buf_size;
struct mutex mutex;
u32 flags;
struct list_head xrc_reg_list;
spinlock_t xrc_reg_list_lock;
u16 xrcdn;
u32 flags;
u8 port;
u8 alt_port;
u8 atomic_rd_en;
@ -183,9 +310,16 @@ struct mlx4_ib_qp {
u8 sq_no_prefetch;
u8 state;
int mlx_type;
enum ib_qpg_type qpg_type;
struct mlx4_ib_qpg_data *qpg_data;
struct list_head gid_list;
int max_inline_data;
struct mlx4_bf bf;
struct list_head steering_rules;
struct mlx4_ib_buf *sqp_proxy_rcv;
struct mlx4_roce_smac_vlan_info pri;
struct mlx4_roce_smac_vlan_info alt;
struct list_head rules_list;
int max_inline_data;
struct mlx4_bf bf;
};
struct mlx4_ib_srq {
@ -208,6 +342,138 @@ struct mlx4_ib_ah {
union mlx4_ext_av av;
};
/****************************************/
/* alias guid support */
/****************************************/
#define NUM_PORT_ALIAS_GUID 2
#define NUM_ALIAS_GUID_IN_REC 8
#define NUM_ALIAS_GUID_REC_IN_PORT 16
#define GUID_REC_SIZE 8
#define NUM_ALIAS_GUID_PER_PORT 128
#define MLX4_NOT_SET_GUID (0x00LL)
#define MLX4_GUID_FOR_DELETE_VAL (~(0x00LL))
enum mlx4_guid_alias_rec_status {
MLX4_GUID_INFO_STATUS_IDLE,
MLX4_GUID_INFO_STATUS_SET,
MLX4_GUID_INFO_STATUS_PENDING,
};
enum mlx4_guid_alias_rec_ownership {
MLX4_GUID_DRIVER_ASSIGN,
MLX4_GUID_SYSADMIN_ASSIGN,
MLX4_GUID_NONE_ASSIGN, /*init state of each record*/
};
enum mlx4_guid_alias_rec_method {
MLX4_GUID_INFO_RECORD_SET = IB_MGMT_METHOD_SET,
MLX4_GUID_INFO_RECORD_DELETE = IB_SA_METHOD_DELETE,
};
struct mlx4_sriov_alias_guid_info_rec_det {
u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC];
ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/
enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/
u8 method; /*set or delete*/
enum mlx4_guid_alias_rec_ownership ownership; /*indicates who assign that alias_guid record*/
};
struct mlx4_sriov_alias_guid_port_rec_det {
struct mlx4_sriov_alias_guid_info_rec_det all_rec_per_port[NUM_ALIAS_GUID_REC_IN_PORT];
struct workqueue_struct *wq;
struct delayed_work alias_guid_work;
u8 port;
struct mlx4_sriov_alias_guid *parent;
struct list_head cb_list;
};
struct mlx4_sriov_alias_guid {
struct mlx4_sriov_alias_guid_port_rec_det ports_guid[MLX4_MAX_PORTS];
spinlock_t ag_work_lock;
struct ib_sa_client *sa_client;
};
struct mlx4_ib_demux_work {
struct work_struct work;
struct mlx4_ib_dev *dev;
int slave;
int do_init;
u8 port;
};
struct mlx4_ib_tun_tx_buf {
struct mlx4_ib_buf buf;
struct ib_ah *ah;
};
struct mlx4_ib_demux_pv_qp {
struct ib_qp *qp;
enum ib_qp_type proxy_qpt;
struct mlx4_ib_buf *ring;
struct mlx4_ib_tun_tx_buf *tx_ring;
spinlock_t tx_lock;
unsigned tx_ix_head;
unsigned tx_ix_tail;
};
enum mlx4_ib_demux_pv_state {
DEMUX_PV_STATE_DOWN,
DEMUX_PV_STATE_STARTING,
DEMUX_PV_STATE_ACTIVE,
DEMUX_PV_STATE_DOWNING,
};
struct mlx4_ib_demux_pv_ctx {
int port;
int slave;
enum mlx4_ib_demux_pv_state state;
int has_smi;
struct ib_device *ib_dev;
struct ib_cq *cq;
struct ib_pd *pd;
struct ib_mr *mr;
struct work_struct work;
struct workqueue_struct *wq;
struct mlx4_ib_demux_pv_qp qp[2];
};
struct mlx4_ib_demux_ctx {
struct ib_device *ib_dev;
int port;
struct workqueue_struct *wq;
struct workqueue_struct *ud_wq;
spinlock_t ud_lock;
__be64 subnet_prefix;
__be64 guid_cache[128];
struct mlx4_ib_dev *dev;
/* the following lock protects both mcg_table and mcg_mgid0_list */
struct mutex mcg_table_lock;
struct rb_root mcg_table;
struct list_head mcg_mgid0_list;
struct workqueue_struct *mcg_wq;
struct mlx4_ib_demux_pv_ctx **tun;
atomic_t tid;
int flushing; /* flushing the work queue */
};
struct mlx4_ib_sriov {
struct mlx4_ib_demux_ctx demux[MLX4_MAX_PORTS];
struct mlx4_ib_demux_pv_ctx *sqps[MLX4_MAX_PORTS];
/* when using this spinlock you should use "irq" because
* it may be called from interrupt context.*/
spinlock_t going_down_lock;
int is_going_down;
struct mlx4_sriov_alias_guid alias_guid;
/* CM paravirtualization fields */
struct list_head cm_list;
spinlock_t id_map_lock;
struct rb_root sl_id_map;
struct idr pv_id_table;
};
struct mlx4_ib_iboe {
spinlock_t lock;
struct net_device *netdevs[MLX4_MAX_PORTS];
@ -215,6 +481,42 @@ struct mlx4_ib_iboe {
union ib_gid gid_table[MLX4_MAX_PORTS][128];
};
struct pkey_mgt {
u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
u16 phys_pkey_cache[MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
struct list_head pkey_port_list[MLX4_MFUNC_MAX];
struct kobject *device_parent[MLX4_MFUNC_MAX];
};
struct mlx4_ib_iov_sysfs_attr {
void *ctx;
struct kobject *kobj;
unsigned long data;
u32 entry_num;
char name[15];
struct device_attribute dentry;
struct device *dev;
};
struct mlx4_ib_iov_sysfs_attr_ar {
struct mlx4_ib_iov_sysfs_attr dentries[3 * NUM_ALIAS_GUID_PER_PORT + 1];
};
struct mlx4_ib_iov_port {
char name[100];
u8 num;
struct mlx4_ib_dev *dev;
struct list_head list;
struct mlx4_ib_iov_sysfs_attr_ar *dentr_ar;
struct ib_port_attr attr;
struct kobject *cur_port;
struct kobject *admin_alias_parent;
struct kobject *gids_parent;
struct kobject *pkeys_parent;
struct kobject *mcgs_parent;
struct mlx4_ib_iov_sysfs_attr mcg_dentry;
};
struct mlx4_ib_dev {
struct ib_device ib_dev;
struct mlx4_dev *dev;
@ -226,12 +528,35 @@ struct mlx4_ib_dev {
struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2];
struct ib_ah *sm_ah[MLX4_MAX_PORTS];
spinlock_t sm_lock;
struct mlx4_ib_sriov sriov;
struct mutex cap_mask_mutex;
struct mutex xrc_reg_mutex;
int ib_active;
bool ib_active;
struct mlx4_ib_iboe iboe;
int counters[MLX4_MAX_PORTS];
int *eq_table;
int eq_added;
struct kobject *iov_parent;
struct kobject *ports_parent;
struct kobject *dev_ports_parent[MLX4_MFUNC_MAX];
struct mlx4_ib_iov_port iov_ports[MLX4_MAX_PORTS];
struct pkey_mgt pkeys;
unsigned long *ib_uc_qpns_bitmap;
int steer_qpn_count;
int steer_qpn_base;
};
struct ib_event_work {
struct work_struct work;
struct mlx4_ib_dev *ib_dev;
struct mlx4_eqe ib_eqe;
};
struct mlx4_ib_qp_tunnel_init_attr {
struct ib_qp_init_attr init_attr;
int slave;
enum ib_qp_type proxy_qp_type;
u8 port;
};
static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
@ -303,6 +628,9 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah)
return container_of(ibah, struct mlx4_ib_ah, ibah);
}
int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev);
void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev);
int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
struct mlx4_db *db);
void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db);
@ -310,9 +638,12 @@ void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db)
struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc);
int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem);
int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
u64 start_va,
int *num_of_mtts);
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
struct ib_udata *udata, int mr_id);
int mlx4_ib_dereg_mr(struct ib_mr *mr);
struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
int max_page_list_len);
@ -322,6 +653,7 @@ void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
int mlx4_ib_ignore_overrun_cq(struct ib_cq *ibcq);
struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector,
struct ib_ucontext *context,
struct ib_udata *udata);
@ -338,11 +670,6 @@ int mlx4_ib_destroy_ah(struct ib_ah *ah);
struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
struct ib_srq_init_attr *init_attr,
struct ib_udata *udata);
struct ib_srq *mlx4_ib_create_xrc_srq(struct ib_pd *pd,
struct ib_cq *xrc_cq,
struct ib_xrcd *xrcd,
struct ib_srq_init_attr *init_attr,
struct ib_udata *udata);
int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
@ -364,7 +691,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
void *in_mad, void *response_mad);
int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
@ -379,20 +706,20 @@ int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages,
u64 iova);
int mlx4_ib_unmap_fmr(struct list_head *fmr_list);
int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr);
int mlx4_ib_create_xrc_rcv_qp(struct ib_qp_init_attr *init_attr,
u32 *qp_num);
int mlx4_ib_modify_xrc_rcv_qp(struct ib_xrcd *xrcd, u32 qp_num,
struct ib_qp_attr *attr, int attr_mask);
int mlx4_ib_query_xrc_rcv_qp(struct ib_xrcd *xrcd, u32 qp_num,
struct ib_qp_attr *attr, int attr_mask,
struct ib_qp_init_attr *init_attr);
int mlx4_ib_reg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num);
int mlx4_ib_unreg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num);
int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props, int netw_view);
int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
u16 *pkey, int netw_view);
int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid, int netw_view);
int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
u8 *mac, int *is_mcast, u8 port);
int mlx4_ib_query_if_stat(struct mlx4_ib_dev *dev, u32 counter_index,
union mlx4_counter *counter, u8 clear);
static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
{
u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3;
@ -403,7 +730,73 @@ static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
return !!(ah->av.ib.g_slid & 0x80);
}
int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx);
void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq);
void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave);
int mlx4_ib_mcg_init(void);
void mlx4_ib_mcg_destroy(void);
int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid);
int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, int slave,
struct ib_sa_mad *sa_mad);
int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave,
struct ib_sa_mad *mad);
int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
union ib_gid *gid);
void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
enum ib_event_type type);
void mlx4_ib_tunnels_update_work(struct work_struct *work);
int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type qpt, struct ib_wc *wc,
struct ib_grh *grh, struct ib_mad *mad);
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad);
__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx);
int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
struct ib_mad *mad, int is_eth);
int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
struct ib_mad *mad);
void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev);
void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave_id);
/* alias guid support */
void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port);
int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev);
void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev);
void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port);
void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
int block_num,
u8 port_num, u8 *p_data);
void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev,
int block_num, u8 port_num,
u8 *p_data);
int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
struct attribute *attr);
void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
struct attribute *attr);
ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index);
int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ;
void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device);
__be64 mlx4_ib_gen_node_guid(void);
int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn);
void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count);
int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
int is_attach);
#endif /* MLX4_IB_H */

View File

@ -31,6 +31,15 @@
* SOFTWARE.
*/
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/sched.h>
#ifdef __linux__
#include <linux/proc_fs.h>
#include <linux/cred.h>
#endif
#include "mlx4_ib.h"
static u32 convert_access(int acc)
@ -41,13 +50,67 @@ static u32 convert_access(int acc)
(acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) |
MLX4_PERM_LOCAL_READ;
}
#ifdef __linux__
static ssize_t shared_mr_proc_read(struct file *file,
char __user *buffer,
size_t len,
loff_t *offset)
{
return -ENOSYS;
}
static ssize_t shared_mr_proc_write(struct file *file,
const char __user *buffer,
size_t len,
loff_t *offset)
{
return -ENOSYS;
}
static int shared_mr_mmap(struct file *filep, struct vm_area_struct *vma)
{
struct proc_dir_entry *pde = PDE(filep->f_path.dentry->d_inode);
struct mlx4_shared_mr_info *smr_info =
(struct mlx4_shared_mr_info *)pde->data;
/* Prevent any mapping not on start of area */
if (vma->vm_pgoff != 0)
return -EINVAL;
return ib_umem_map_to_vma(smr_info->umem,
vma);
}
static const struct file_operations shared_mr_proc_ops = {
.owner = THIS_MODULE,
.read = shared_mr_proc_read,
.write = shared_mr_proc_write,
.mmap = shared_mr_mmap
};
static mode_t convert_shared_access(int acc)
{
return (acc & IB_ACCESS_SHARED_MR_USER_READ ? S_IRUSR : 0) |
(acc & IB_ACCESS_SHARED_MR_USER_WRITE ? S_IWUSR : 0) |
(acc & IB_ACCESS_SHARED_MR_GROUP_READ ? S_IRGRP : 0) |
(acc & IB_ACCESS_SHARED_MR_GROUP_WRITE ? S_IWGRP : 0) |
(acc & IB_ACCESS_SHARED_MR_OTHER_READ ? S_IROTH : 0) |
(acc & IB_ACCESS_SHARED_MR_OTHER_WRITE ? S_IWOTH : 0);
}
#endif
struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
{
struct mlx4_ib_mr *mr;
int err;
mr = kmalloc(sizeof *mr, GFP_KERNEL);
mr = kzalloc(sizeof *mr, GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
@ -74,118 +137,350 @@ struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
return ERR_PTR(err);
}
static int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev,
struct mlx4_mtt *mtt,
u64 mtt_size,
u64 mtt_shift,
u64 len,
u64 cur_start_addr,
u64 *pages,
int *start_index,
int *npages)
{
int k;
int err = 0;
u64 mtt_entries;
u64 cur_end_addr = cur_start_addr + len;
u64 cur_end_addr_aligned = 0;
len += (cur_start_addr & (mtt_size-1ULL));
cur_end_addr_aligned = round_up(cur_end_addr, mtt_size);
len += (cur_end_addr_aligned - cur_end_addr);
if (len & (mtt_size-1ULL)) {
WARN(1 ,
"write_block: len %llx is not aligned to mtt_size %llx\n",
len, mtt_size);
return -EINVAL;
}
mtt_entries = (len >> mtt_shift);
/* Align the MTT start address to
the mtt_size.
Required to handle cases when the MR
starts in the middle of an MTT record.
Was not required in old code since
the physical addresses provided by
the dma subsystem were page aligned,
which was also the MTT size.
*/
cur_start_addr = round_down(cur_start_addr, mtt_size);
/* A new block is started ...*/
for (k = 0; k < mtt_entries; ++k) {
pages[*npages] = cur_start_addr + (mtt_size * k);
(*npages)++;
/*
* Be friendly to mlx4_write_mtt() and
* pass it chunks of appropriate size.
*/
if (*npages == PAGE_SIZE / sizeof(u64)) {
err = mlx4_write_mtt(dev->dev,
mtt, *start_index,
*npages, pages);
if (err)
return err;
(*start_index) += *npages;
*npages = 0;
}
}
return 0;
}
int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem)
{
u64 *pages;
struct ib_umem_chunk *chunk;
int i, j, k;
int n;
int len;
int j;
u64 len = 0;
int err = 0;
u64 mtt_size;
u64 cur_start_addr = 0;
u64 mtt_shift;
int start_index = 0;
int npages = 0;
pages = (u64 *) __get_free_page(GFP_KERNEL);
if (!pages)
return -ENOMEM;
i = n = 0;
mtt_shift = mtt->page_shift;
mtt_size = 1ULL << mtt_shift;
list_for_each_entry(chunk, &umem->chunk_list, list)
for (j = 0; j < chunk->nmap; ++j) {
len = sg_dma_len(&chunk->page_list[j]) >> mtt->page_shift;
for (k = 0; k < len; ++k) {
pages[i++] = sg_dma_address(&chunk->page_list[j]) +
umem->page_size * k;
/*
* Be friendly to mlx4_write_mtt() and
* pass it chunks of appropriate size.
*/
if (i == PAGE_SIZE / sizeof (u64)) {
err = mlx4_write_mtt(dev->dev, mtt, n,
i, pages);
if (err)
goto out;
n += i;
i = 0;
}
if (cur_start_addr + len ==
sg_dma_address(&chunk->page_list[j])) {
/* still the same block */
len += sg_dma_len(&chunk->page_list[j]);
continue;
}
/* A new block is started ...*/
/* If len is malaligned, write an extra mtt entry to
cover the misaligned area (round up the division)
*/
err = mlx4_ib_umem_write_mtt_block(dev,
mtt, mtt_size, mtt_shift,
len, cur_start_addr,
pages,
&start_index,
&npages);
if (err)
goto out;
cur_start_addr =
sg_dma_address(&chunk->page_list[j]);
len = sg_dma_len(&chunk->page_list[j]);
}
if (i)
err = mlx4_write_mtt(dev->dev, mtt, n, i, pages);
/* Handle the last block */
if (len > 0) {
/* If len is malaligned, write an extra mtt entry to cover
the misaligned area (round up the division)
*/
err = mlx4_ib_umem_write_mtt_block(dev,
mtt, mtt_size, mtt_shift,
len, cur_start_addr,
pages,
&start_index,
&npages);
if (err)
goto out;
}
if (npages)
err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages);
out:
free_page((unsigned long) pages);
return err;
}
static int handle_hugetlb_user_mr(struct ib_pd *pd, struct mlx4_ib_mr *mr,
u64 start, u64 virt_addr, int access_flags)
static inline u64 alignment_of(u64 ptr)
{
return ilog2(ptr & (~(ptr-1)));
}
static int mlx4_ib_umem_calc_block_mtt(u64 next_block_start,
u64 current_block_end,
u64 block_shift)
{
/* Check whether the alignment of the new block
is aligned as well as the previous block.
Block address must start with zeros till size of entity_size.
*/
if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0)
/* It is not as well aligned as the
previous block-reduce the mtt size
accordingly.
Here we take the last right bit
which is 1.
*/
block_shift = alignment_of(next_block_start);
/* Check whether the alignment of the
end of previous block - is it aligned
as well as the start of the block
*/
if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0)
/* It is not as well aligned as
the start of the block - reduce the
mtt size accordingly.
*/
block_shift = alignment_of(current_block_end);
return block_shift;
}
/* Calculate optimal mtt size based on contiguous pages.
* Function will return also the number of pages that are not aligned to the
calculated mtt_size to be added to total number
of pages. For that we should check the first chunk length & last chunk
length and if not aligned to mtt_size we should increment
the non_aligned_pages number.
All chunks in the middle already handled as part of mtt shift calculation
for both their start & end addresses.
*/
int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
u64 start_va,
int *num_of_mtts)
{
#if defined(CONFIG_HUGETLB_PAGE) && !defined(__powerpc__) && !defined(__ia64__)
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct ib_umem_chunk *chunk;
unsigned dsize;
dma_addr_t daddr;
unsigned cur_size = 0;
dma_addr_t uninitialized_var(cur_addr);
int n;
struct ib_umem *umem = mr->umem;
u64 *arr;
int err = 0;
int i;
int j = 0;
int off = start & (HPAGE_SIZE - 1);
int j;
u64 block_shift = MLX4_MAX_MTT_SHIFT;
u64 current_block_len = 0;
u64 current_block_start = 0;
u64 misalignment_bits;
u64 first_block_start = 0;
u64 last_block_end = 0;
u64 total_len = 0;
u64 last_block_aligned_end = 0;
u64 min_shift = ilog2(umem->page_size);
n = DIV_ROUND_UP(off + umem->length, HPAGE_SIZE);
arr = kmalloc(n * sizeof *arr, GFP_KERNEL);
if (!arr)
return -ENOMEM;
list_for_each_entry(chunk, &umem->chunk_list, list)
for (i = 0; i < chunk->nmap; ++i) {
daddr = sg_dma_address(&chunk->page_list[i]);
dsize = sg_dma_len(&chunk->page_list[i]);
if (!cur_size) {
cur_addr = daddr;
cur_size = dsize;
} else if (cur_addr + cur_size != daddr) {
err = -EINVAL;
goto out;
} else
cur_size += dsize;
if (cur_size > HPAGE_SIZE) {
err = -EINVAL;
goto out;
} else if (cur_size == HPAGE_SIZE) {
cur_size = 0;
arr[j++] = cur_addr;
}
list_for_each_entry(chunk, &umem->chunk_list, list) {
/* Initialization - save the first chunk start as
the current_block_start - block means contiguous pages.
*/
if (current_block_len == 0 && current_block_start == 0) {
first_block_start = current_block_start =
sg_dma_address(&chunk->page_list[0]);
/* Find the bits that are different between
the physical address and the virtual
address for the start of the MR.
*/
/* umem_get aligned the start_va to a page
boundry. Therefore, we need to align the
start va to the same boundry */
/* misalignment_bits is needed to handle the
case of a single memory region. In this
case, the rest of the logic will not reduce
the block size. If we use a block size
which is bigger than the alignment of the
misalignment bits, we might use the virtual
page number instead of the physical page
number, resulting in access to the wrong
data. */
misalignment_bits =
(start_va & (~(((u64)(umem->page_size))-1ULL)))
^ current_block_start;
block_shift = min(alignment_of(misalignment_bits)
, block_shift);
}
if (cur_size) {
arr[j++] = cur_addr;
/* Go over the scatter entries in the current chunk, check
if they continue the previous scatter entry.
*/
for (j = 0; j < chunk->nmap; ++j) {
u64 next_block_start =
sg_dma_address(&chunk->page_list[j]);
u64 current_block_end = current_block_start
+ current_block_len;
/* If we have a split (non-contig.) between two block*/
if (current_block_end != next_block_start) {
block_shift = mlx4_ib_umem_calc_block_mtt(
next_block_start,
current_block_end,
block_shift);
/* If we reached the minimum shift for 4k
page we stop the loop.
*/
if (block_shift <= min_shift)
goto end;
/* If not saved yet we are in first block -
we save the length of first block to
calculate the non_aligned_pages number at
* the end.
*/
total_len += current_block_len;
/* Start a new block */
current_block_start = next_block_start;
current_block_len =
sg_dma_len(&chunk->page_list[j]);
continue;
}
/* The scatter entry is another part of
the current block, increase the block size
* An entry in the scatter can be larger than
4k (page) as of dma mapping
which merge some blocks together.
*/
current_block_len +=
sg_dma_len(&chunk->page_list[j]);
}
}
err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, umem->length,
convert_access(access_flags), n, HPAGE_SHIFT, &mr->mmr);
if (err)
goto out;
/* Account for the last block in the total len */
total_len += current_block_len;
/* Add to the first block the misalignment that it suffers from.*/
total_len += (first_block_start & ((1ULL<<block_shift)-1ULL));
last_block_end = current_block_start+current_block_len;
last_block_aligned_end = round_up(last_block_end, 1<<block_shift);
total_len += (last_block_aligned_end - last_block_end);
err = mlx4_write_mtt(dev->dev, &mr->mmr.mtt, 0, n, arr);
WARN((total_len & ((1ULL<<block_shift)-1ULL)),
" misaligned total length detected (%llu, %llu)!",
total_len, block_shift);
out:
kfree(arr);
return err;
#else
return -ENOSYS;
#endif
*num_of_mtts = total_len >> block_shift;
end:
if (block_shift < min_shift) {
/* If shift is less than the min we set a WARN and
return the min shift.
*/
WARN(1,
"mlx4_ib_umem_calc_optimal_mtt_size - unexpected shift %lld\n",
block_shift);
block_shift = min_shift;
}
return block_shift;
}
#ifdef __linux__
static int prepare_shared_mr(struct mlx4_ib_mr *mr, int access_flags, int mr_id)
{
struct proc_dir_entry *mr_proc_entry;
mode_t mode = S_IFREG;
char name_buff[16];
mode |= convert_shared_access(access_flags);
sprintf(name_buff, "%X", mr_id);
mr->smr_info = kmalloc(sizeof(struct mlx4_shared_mr_info), GFP_KERNEL);
mr->smr_info->mr_id = mr_id;
mr->smr_info->umem = mr->umem;
mr_proc_entry = proc_create_data(name_buff, mode,
mlx4_mrs_dir_entry,
&shared_mr_proc_ops,
mr->smr_info);
if (!mr_proc_entry) {
pr_err("prepare_shared_mr failed via proc\n");
kfree(mr->smr_info);
return -ENODEV;
}
current_uid_gid(&(mr_proc_entry->uid), &(mr_proc_entry->gid));
mr_proc_entry->size = mr->umem->length;
return 0;
}
static int is_shared_mr(int access_flags)
{
/* We should check whether IB_ACCESS_SHARED_MR_USER_READ or
other shared bits were turned on.
*/
return !!(access_flags & (IB_ACCESS_SHARED_MR_USER_READ |
IB_ACCESS_SHARED_MR_USER_WRITE |
IB_ACCESS_SHARED_MR_GROUP_READ |
IB_ACCESS_SHARED_MR_GROUP_WRITE |
IB_ACCESS_SHARED_MR_OTHER_READ |
IB_ACCESS_SHARED_MR_OTHER_WRITE));
}
#endif
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata)
struct ib_udata *udata,
int mr_id)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_mr *mr;
@ -193,38 +488,49 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int err;
int n;
mr = kmalloc(sizeof *mr, GFP_KERNEL);
mr = kzalloc(sizeof *mr, GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
mr->umem = ib_umem_get(pd->uobject->context, start, length,
access_flags, 0);
access_flags, 0);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err_free;
}
if (!mr->umem->hugetlb ||
handle_hugetlb_user_mr(pd, mr, start, virt_addr, access_flags)) {
n = ib_umem_page_count(mr->umem);
shift = ilog2(mr->umem->page_size);
n = ib_umem_page_count(mr->umem);
shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start,
&n);
err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
convert_access(access_flags), n, shift, &mr->mmr);
if (err)
goto err_umem;
err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
convert_access(access_flags), n, shift, &mr->mmr);
if (err)
goto err_umem;
err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
if (err)
goto err_mr;
}
err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
if (err)
goto err_mr;
err = mlx4_mr_enable(dev->dev, &mr->mmr);
if (err)
goto err_mr;
mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
#ifdef __linux__
/* Check whether MR should be shared */
if (is_shared_mr(access_flags)) {
/* start address and length must be aligned to page size in order
to map a full page and preventing leakage of data */
if (mr->umem->offset || (length & ~PAGE_MASK)) {
err = -EINVAL;
goto err_mr;
}
err = prepare_shared_mr(mr, access_flags, mr_id);
if (err)
goto err_mr;
}
#endif
return &mr->ibmr;
err_mr:
@ -239,13 +545,36 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_PTR(err);
}
int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
{
struct mlx4_ib_mr *mr = to_mmr(ibmr);
mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
if (mr->smr_info) {
/* When master/parent shared mr is dereged there is
no ability to share this mr any more - its mr_id will be
returned to the kernel as part of ib_uverbs_dereg_mr
and may be allocated again as part of other reg_mr.
*/
char name_buff[16];
sprintf(name_buff, "%X", mr->smr_info->mr_id);
/* Remove proc entry is checking internally that no operation
was strated on that proc fs file and if in the middle
current process will wait till end of operation.
That's why no sync mechanism is needed when we release
below the shared umem.
*/
#ifdef __linux__
remove_proc_entry(name_buff, mlx4_mrs_dir_entry);
kfree(mr->smr_info);
#endif
}
if (mr->umem)
ib_umem_release(mr->umem);
kfree(mr);
return 0;
@ -258,7 +587,7 @@ struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
struct mlx4_ib_mr *mr;
int err;
mr = kmalloc(sizeof *mr, GFP_KERNEL);
mr = kzalloc(sizeof *mr, GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
@ -291,7 +620,7 @@ struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device
struct mlx4_ib_fast_reg_page_list *mfrpl;
int size = page_list_len * sizeof (u64);
if (page_list_len > MAX_FAST_REG_PAGES)
if (page_list_len > MLX4_MAX_FAST_REG_PAGES)
return ERR_PTR(-EINVAL);
mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL);
@ -403,7 +732,7 @@ int mlx4_ib_unmap_fmr(struct list_head *fmr_list)
err = mlx4_SYNC_TPT(mdev);
if (err)
printk(KERN_WARNING "mlx4_ib: SYNC_TPT error %d when "
pr_warn("SYNC_TPT error %d when "
"unmapping FMRs\n", err);
return 0;

File diff suppressed because it is too large Load Diff

View File

@ -33,6 +33,7 @@
#include <linux/mlx4/qp.h>
#include <linux/mlx4/srq.h>
#include <linux/slab.h>
#include "mlx4_ib.h"
#include "user.h"
@ -58,7 +59,7 @@ static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type)
event.event = IB_EVENT_SRQ_ERR;
break;
default:
printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
pr_warn("Unexpected event type %d "
"on SRQ %06x\n", type, srq->srqn);
return;
}
@ -67,17 +68,16 @@ static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type)
}
}
struct ib_srq *mlx4_ib_create_xrc_srq(struct ib_pd *pd,
struct ib_cq *xrc_cq,
struct ib_xrcd *xrcd,
struct ib_srq_init_attr *init_attr,
struct ib_udata *udata)
struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
struct ib_srq_init_attr *init_attr,
struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_srq *srq;
struct mlx4_wqe_srq_next_seg *next;
u32 cqn;
u16 xrcdn;
struct mlx4_wqe_data_seg *scatter;
u32 cqn;
u16 xrcdn;
int desc_size;
int buf_size;
int err;
@ -85,14 +85,10 @@ struct ib_srq *mlx4_ib_create_xrc_srq(struct ib_pd *pd,
/* Sanity check SRQ size before proceeding */
if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes ||
init_attr->attr.max_sge > dev->dev->caps.max_srq_sge) {
mlx4_ib_dbg("a size param is out of range. "
"max_wr = 0x%x, max_sge = 0x%x",
init_attr->attr.max_wr, init_attr->attr.max_sge);
init_attr->attr.max_sge > dev->dev->caps.max_srq_sge)
return ERR_PTR(-EINVAL);
}
srq = kzalloc(sizeof *srq, GFP_KERNEL);
srq = kmalloc(sizeof *srq, GFP_KERNEL);
if (!srq)
return ERR_PTR(-ENOMEM);
@ -138,8 +134,6 @@ struct ib_srq *mlx4_ib_create_xrc_srq(struct ib_pd *pd,
if (err)
goto err_mtt;
} else {
struct mlx4_wqe_data_seg *scatter;
err = mlx4_db_alloc(dev->dev, &srq->db, 0);
if (err)
goto err_srq;
@ -182,24 +176,24 @@ struct ib_srq *mlx4_ib_create_xrc_srq(struct ib_pd *pd,
}
}
cqn = xrc_cq ? (u32) (to_mcq(xrc_cq)->mcq.cqn) : 0;
xrcdn = xrcd ? (u16) (to_mxrcd(xrcd)->xrcdn) :
cqn = (init_attr->srq_type == IB_SRQT_XRC) ?
to_mcq(init_attr->ext.xrc.cq)->mcq.cqn : 0;
xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ?
to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn :
(u16) dev->dev->caps.reserved_xrcds;
err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, cqn, xrcdn, &srq->mtt,
srq->db.dma, &srq->msrq);
if (err)
goto err_wrid;
srq->msrq.event = mlx4_ib_srq_event;
srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
if (pd->uobject) {
if (pd->uobject)
if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) {
err = -EFAULT;
goto err_wrid;
}
} else
srq->ibsrq.xrc_srq_num = srq->msrq.srqn;
init_attr->attr.max_wr = srq->msrq.max - 1;
@ -238,16 +232,12 @@ int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
int ret;
/* We don't support resizing SRQs (yet?) */
if (attr_mask & IB_SRQ_MAX_WR) {
mlx4_ib_dbg("resize not yet supported");
if (attr_mask & IB_SRQ_MAX_WR)
return -EINVAL;
}
if (attr_mask & IB_SRQ_LIMIT) {
if (attr->srq_limit >= srq->msrq.max){
mlx4_ib_dbg("limit (0x%x) too high", attr->srq_limit);
if (attr->srq_limit >= srq->msrq.max)
return -EINVAL;
}
mutex_lock(&srq->mutex);
ret = mlx4_srq_arm(dev->dev, &srq->msrq, attr->srq_limit);
@ -260,13 +250,6 @@ int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
return 0;
}
struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
struct ib_srq_init_attr *init_attr,
struct ib_udata *udata)
{
return mlx4_ib_create_xrc_srq(pd, NULL, NULL, init_attr, udata);
}
int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
{
struct mlx4_ib_dev *dev = to_mdev(ibsrq->device);
@ -289,18 +272,6 @@ int mlx4_ib_destroy_srq(struct ib_srq *srq)
{
struct mlx4_ib_dev *dev = to_mdev(srq->device);
struct mlx4_ib_srq *msrq = to_msrq(srq);
struct mlx4_ib_cq *cq;
mlx4_srq_invalidate(dev->dev, &msrq->msrq);
if (srq->xrc_cq && !srq->uobject) {
cq = to_mcq(srq->xrc_cq);
spin_lock_irq(&cq->lock);
__mlx4_ib_cq_clean(cq, -1, msrq);
mlx4_srq_remove(dev->dev, &msrq->msrq);
spin_unlock_irq(&cq->lock);
} else
mlx4_srq_remove(dev->dev, &msrq->msrq);
mlx4_srq_free(dev->dev, &msrq->msrq);
mlx4_mtt_cleanup(dev->dev, &msrq->mtt);
@ -349,16 +320,12 @@ int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (unlikely(wr->num_sge > srq->msrq.max_gs)) {
mlx4_ib_dbg("srq num 0x%x: num s/g entries too large (%d)",
srq->msrq.srqn, wr->num_sge);
err = -EINVAL;
*bad_wr = wr;
break;
}
if (unlikely(srq->head == srq->tail)) {
mlx4_ib_dbg("srq num 0x%x: No entries available to post.",
srq->msrq.srqn);
err = -ENOMEM;
*bad_wr = wr;
break;

View File

@ -0,0 +1,800 @@
/*
* Copyright (c) 2012 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/*#include "core_priv.h"*/
#include "mlx4_ib.h"
#include <linux/slab.h>
#include <linux/string.h>
#include <rdma/ib_mad.h>
/*show_admin_alias_guid returns the administratively assigned value of that GUID.
* Values returned in buf parameter string:
* 0 - requests opensm to assign a value.
* ffffffffffffffff - delete this entry.
* other - value assigned by administrator.
*/
static ssize_t show_admin_alias_guid(struct device *dev,
struct device_attribute *attr, char *buf)
{
int record_num;/*0-15*/
int guid_index_in_rec; /*0 - 7*/
struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
struct mlx4_ib_dev *mdev = port->dev;
record_num = mlx4_ib_iov_dentry->entry_num / 8 ;
guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8 ;
return sprintf(buf, "%llx\n", (long long)
be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid.
ports_guid[port->num - 1].
all_rec_per_port[record_num].
all_recs[8 * guid_index_in_rec]));
}
/* store_admin_alias_guid stores the (new) administratively assigned value of that GUID.
* Values in buf parameter string:
* 0 - requests opensm to assign a value.
* 0xffffffffffffffff - delete this entry.
* other - guid value assigned by the administrator.
*/
static ssize_t store_admin_alias_guid(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
int record_num;/*0-15*/
int guid_index_in_rec; /*0 - 7*/
struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
struct mlx4_ib_dev *mdev = port->dev;
u64 sysadmin_ag_val;
record_num = mlx4_ib_iov_dentry->entry_num / 8;
guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8;
if (0 == record_num && 0 == guid_index_in_rec) {
pr_err("GUID 0 block 0 is RO\n");
return count;
}
sscanf(buf, "%llx", &sysadmin_ag_val);
*(__be64 *)&mdev->sriov.alias_guid.ports_guid[port->num - 1].
all_rec_per_port[record_num].
all_recs[GUID_REC_SIZE * guid_index_in_rec] =
cpu_to_be64(sysadmin_ag_val);
/* Change the state to be pending for update */
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].status
= MLX4_GUID_INFO_STATUS_IDLE ;
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
= MLX4_GUID_INFO_RECORD_SET;
switch (sysadmin_ag_val) {
case MLX4_GUID_FOR_DELETE_VAL:
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
= MLX4_GUID_INFO_RECORD_DELETE;
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
= MLX4_GUID_SYSADMIN_ASSIGN;
break;
/* The sysadmin requests the SM to re-assign */
case MLX4_NOT_SET_GUID:
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
= MLX4_GUID_DRIVER_ASSIGN;
break;
/* The sysadmin requests a specific value.*/
default:
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
= MLX4_GUID_SYSADMIN_ASSIGN;
break;
}
/* set the record index */
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].guid_indexes
= mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec);
mlx4_ib_init_alias_guid_work(mdev, port->num - 1);
return count;
}
static ssize_t show_port_gid(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
struct mlx4_ib_dev *mdev = port->dev;
union ib_gid gid;
ssize_t ret;
ret = __mlx4_ib_query_gid(&mdev->ib_dev, port->num,
mlx4_ib_iov_dentry->entry_num, &gid, 1);
if (ret)
return ret;
ret = sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
be16_to_cpu(((__be16 *) gid.raw)[0]),
be16_to_cpu(((__be16 *) gid.raw)[1]),
be16_to_cpu(((__be16 *) gid.raw)[2]),
be16_to_cpu(((__be16 *) gid.raw)[3]),
be16_to_cpu(((__be16 *) gid.raw)[4]),
be16_to_cpu(((__be16 *) gid.raw)[5]),
be16_to_cpu(((__be16 *) gid.raw)[6]),
be16_to_cpu(((__be16 *) gid.raw)[7]));
return ret;
}
static ssize_t show_phys_port_pkey(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
struct mlx4_ib_dev *mdev = port->dev;
u16 pkey;
ssize_t ret;
ret = __mlx4_ib_query_pkey(&mdev->ib_dev, port->num,
mlx4_ib_iov_dentry->entry_num, &pkey, 1);
if (ret)
return ret;
return sprintf(buf, "0x%04x\n", pkey);
}
#define DENTRY_REMOVE(_dentry) \
do { \
sysfs_remove_file((_dentry)->kobj, &(_dentry)->dentry.attr); \
} while (0);
static int create_sysfs_entry(void *_ctx, struct mlx4_ib_iov_sysfs_attr *_dentry,
char *_name, struct kobject *_kobj,
ssize_t (*show)(struct device *dev,
struct device_attribute *attr,
char *buf),
ssize_t (*store)(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
)
{
int ret = 0;
struct mlx4_ib_iov_sysfs_attr *vdentry = _dentry;
vdentry->ctx = _ctx;
vdentry->dentry.show = show;
vdentry->dentry.store = store;
sysfs_attr_init(&vdentry->dentry.attr);
vdentry->dentry.attr.name = vdentry->name;
vdentry->dentry.attr.mode = 0;
vdentry->kobj = _kobj;
snprintf(vdentry->name, 15, "%s", _name);
if (vdentry->dentry.store)
vdentry->dentry.attr.mode |= S_IWUSR;
if (vdentry->dentry.show)
vdentry->dentry.attr.mode |= S_IRUGO;
ret = sysfs_create_file(vdentry->kobj, &vdentry->dentry.attr);
if (ret) {
pr_err("failed to create %s\n", vdentry->dentry.attr.name);
vdentry->ctx = NULL;
return ret;
}
return ret;
}
int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
struct attribute *attr)
{
struct mlx4_ib_iov_port *port = &device->iov_ports[port_num - 1];
int ret;
ret = sysfs_create_file(port->mcgs_parent, attr);
if (ret)
pr_err("failed to create %s\n", attr->name);
return ret;
}
void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
struct attribute *attr)
{
struct mlx4_ib_iov_port *port = &device->iov_ports[port_num - 1];
sysfs_remove_file(port->mcgs_parent, attr);
}
static int add_port_entries(struct mlx4_ib_dev *device, int port_num)
{
int i;
char buff[10];
struct mlx4_ib_iov_port *port = NULL;
int ret = 0 ;
struct ib_port_attr attr;
/* get the physical gid and pkey table sizes.*/
ret = __mlx4_ib_query_port(&device->ib_dev, port_num, &attr, 1);
if (ret)
goto err;
port = &device->iov_ports[port_num - 1];
port->dev = device;
port->num = port_num;
/* Directory structure:
* iov -
* port num -
* admin_guids
* gids (operational)
* mcg_table
*/
port->dentr_ar = kzalloc(sizeof (struct mlx4_ib_iov_sysfs_attr_ar),
GFP_KERNEL);
if (!port->dentr_ar) {
ret = -ENOMEM;
goto err;
}
sprintf(buff, "%d", port_num);
port->cur_port = kobject_create_and_add(buff,
kobject_get(device->ports_parent));
if (!port->cur_port) {
ret = -ENOMEM;
goto kobj_create_err;
}
/* admin GUIDs */
port->admin_alias_parent = kobject_create_and_add("admin_guids",
kobject_get(port->cur_port));
if (!port->admin_alias_parent) {
ret = -ENOMEM;
goto err_admin_guids;
}
for (i = 0 ; i < attr.gid_tbl_len; i++) {
sprintf(buff, "%d", i);
port->dentr_ar->dentries[i].entry_num = i;
ret = create_sysfs_entry(port, &port->dentr_ar->dentries[i],
buff, port->admin_alias_parent,
show_admin_alias_guid, store_admin_alias_guid);
if (ret)
goto err_admin_alias_parent;
}
/* gids subdirectory (operational gids) */
port->gids_parent = kobject_create_and_add("gids",
kobject_get(port->cur_port));
if (!port->gids_parent) {
ret = -ENOMEM;
goto err_gids;
}
for (i = 0 ; i < attr.gid_tbl_len; i++) {
sprintf(buff, "%d", i);
port->dentr_ar->dentries[attr.gid_tbl_len + i].entry_num = i;
ret = create_sysfs_entry(port,
&port->dentr_ar->dentries[attr.gid_tbl_len + i],
buff,
port->gids_parent, show_port_gid, NULL);
if (ret)
goto err_gids_parent;
}
/* physical port pkey table */
port->pkeys_parent =
kobject_create_and_add("pkeys", kobject_get(port->cur_port));
if (!port->pkeys_parent) {
ret = -ENOMEM;
goto err_pkeys;
}
for (i = 0 ; i < attr.pkey_tbl_len; i++) {
sprintf(buff, "%d", i);
port->dentr_ar->dentries[2 * attr.gid_tbl_len + i].entry_num = i;
ret = create_sysfs_entry(port,
&port->dentr_ar->dentries[2 * attr.gid_tbl_len + i],
buff, port->pkeys_parent,
show_phys_port_pkey, NULL);
if (ret)
goto err_pkeys_parent;
}
/* MCGs table */
port->mcgs_parent =
kobject_create_and_add("mcgs", kobject_get(port->cur_port));
if (!port->mcgs_parent) {
ret = -ENOMEM;
goto err_mcgs;
}
return 0;
err_mcgs:
kobject_put(port->cur_port);
err_pkeys_parent:
kobject_put(port->pkeys_parent);
err_pkeys:
kobject_put(port->cur_port);
err_gids_parent:
kobject_put(port->gids_parent);
err_gids:
kobject_put(port->cur_port);
err_admin_alias_parent:
kobject_put(port->admin_alias_parent);
err_admin_guids:
kobject_put(port->cur_port);
kobject_put(port->cur_port); /* once more for create_and_add buff */
kobj_create_err:
kobject_put(device->ports_parent);
kfree(port->dentr_ar);
err:
pr_err("add_port_entries FAILED: for port:%d, error: %d\n",
port_num, ret);
return ret;
}
static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max)
{
char base_name[9];
/* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */
strlcpy(name, pci_name(dev->dev->pdev), max);
strncpy(base_name, name, 8); /*till xxxx:yy:*/
base_name[8] = '\0';
/* with no ARI only 3 last bits are used so when the fn is higher than 8
* need to add it to the dev num, so count in the last number will be
* modulo 8 */
sprintf(name, "%s%.2d.%d", base_name, (i/8), (i%8));
}
struct mlx4_port {
struct kobject kobj;
struct mlx4_ib_dev *dev;
struct attribute_group pkey_group;
struct attribute_group gid_group;
u8 port_num;
int slave;
};
static void mlx4_port_release(struct kobject *kobj)
{
struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj);
struct attribute *a;
int i;
for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
kfree(a);
kfree(p->pkey_group.attrs);
for (i = 0; (a = p->gid_group.attrs[i]); ++i)
kfree(a);
kfree(p->gid_group.attrs);
kfree(p);
}
struct port_attribute {
struct attribute attr;
ssize_t (*show)(struct mlx4_port *, struct port_attribute *, char *buf);
ssize_t (*store)(struct mlx4_port *, struct port_attribute *,
const char *buf, size_t count);
};
static ssize_t port_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
struct port_attribute *port_attr =
container_of(attr, struct port_attribute, attr);
struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj);
if (!port_attr->show)
return -EIO;
return port_attr->show(p, port_attr, buf);
}
static ssize_t port_attr_store(struct kobject *kobj,
struct attribute *attr,
const char *buf, size_t size)
{
struct port_attribute *port_attr =
container_of(attr, struct port_attribute, attr);
struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj);
if (!port_attr->store)
return -EIO;
return port_attr->store(p, port_attr, buf, size);
}
static const struct sysfs_ops port_sysfs_ops = {
.show = port_attr_show,
.store = port_attr_store,
};
static struct kobj_type port_type = {
.release = mlx4_port_release,
.sysfs_ops = &port_sysfs_ops,
};
struct port_table_attribute {
struct port_attribute attr;
char name[8];
int index;
};
static ssize_t show_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
char *buf)
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
ssize_t ret = -ENODEV;
if (p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index] >=
(p->dev->dev->caps.pkey_table_len[p->port_num]))
ret = sprintf(buf, "none\n");
else
ret = sprintf(buf, "%d\n",
p->dev->pkeys.virt2phys_pkey[p->slave]
[p->port_num - 1][tab_attr->index]);
return ret;
}
static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
const char *buf, size_t count)
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
int idx;
int err;
/* do not allow remapping Dom0 virtual pkey table */
if (p->slave == mlx4_master_func_num(p->dev->dev))
return -EINVAL;
if (!strncasecmp(buf, "no", 2))
idx = p->dev->dev->phys_caps.pkey_phys_table_len[p->port_num] - 1;
else if (sscanf(buf, "%i", &idx) != 1 ||
idx >= p->dev->dev->caps.pkey_table_len[p->port_num] ||
idx < 0)
return -EINVAL;
p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1]
[tab_attr->index] = idx;
mlx4_sync_pkey_table(p->dev->dev, p->slave, p->port_num,
tab_attr->index, idx);
err = mlx4_gen_pkey_eqe(p->dev->dev, p->slave, p->port_num);
if (err) {
pr_err("mlx4_gen_pkey_eqe failed for slave %d,"
" port %d, index %d\n", p->slave, p->port_num, idx);
return err;
}
return count;
}
static ssize_t show_port_gid_idx(struct mlx4_port *p,
struct port_attribute *attr, char *buf)
{
return sprintf(buf, "%d\n", p->slave);
}
static struct attribute **
alloc_group_attrs(ssize_t (*show)(struct mlx4_port *,
struct port_attribute *, char *buf),
ssize_t (*store)(struct mlx4_port *, struct port_attribute *,
const char *buf, size_t count),
int len)
{
struct attribute **tab_attr;
struct port_table_attribute *element;
int i;
tab_attr = kcalloc(1 + len, sizeof (struct attribute *), GFP_KERNEL);
if (!tab_attr)
return NULL;
for (i = 0; i < len; i++) {
element = kzalloc(sizeof (struct port_table_attribute),
GFP_KERNEL);
if (!element)
goto err;
if (snprintf(element->name, sizeof (element->name),
"%d", i) >= sizeof (element->name)) {
kfree(element);
goto err;
}
sysfs_attr_init(&element->attr.attr);
element->attr.attr.name = element->name;
if (store) {
element->attr.attr.mode = S_IWUSR | S_IRUGO;
element->attr.store = store;
} else
element->attr.attr.mode = S_IRUGO;
element->attr.show = show;
element->index = i;
tab_attr[i] = &element->attr.attr;
}
return tab_attr;
err:
while (--i >= 0)
kfree(tab_attr[i]);
kfree(tab_attr);
return NULL;
}
static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
{
struct mlx4_port *p;
int i;
int ret;
int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port_num) ==
IB_LINK_LAYER_ETHERNET;
p = kzalloc(sizeof *p, GFP_KERNEL);
if (!p)
return -ENOMEM;
p->dev = dev;
p->port_num = port_num;
p->slave = slave;
ret = kobject_init_and_add(&p->kobj, &port_type,
kobject_get(dev->dev_ports_parent[slave]),
"%d", port_num);
if (ret)
goto err_alloc;
p->pkey_group.name = "pkey_idx";
if (is_eth)
p->pkey_group.attrs =
alloc_group_attrs(show_port_pkey, NULL,
dev->dev->caps.pkey_table_len[port_num]);
else
p->pkey_group.attrs =
alloc_group_attrs(show_port_pkey, store_port_pkey,
dev->dev->caps.pkey_table_len[port_num]);
if (!p->pkey_group.attrs)
goto err_alloc;
ret = sysfs_create_group(&p->kobj, &p->pkey_group);
if (ret)
goto err_free_pkey;
p->gid_group.name = "gid_idx";
p->gid_group.attrs = alloc_group_attrs(show_port_gid_idx, NULL, 1);
if (!p->gid_group.attrs)
goto err_free_pkey;
ret = sysfs_create_group(&p->kobj, &p->gid_group);
if (ret)
goto err_free_gid;
list_add_tail(&p->kobj.entry, &dev->pkeys.pkey_port_list[slave]);
return 0;
err_free_gid:
kfree(p->gid_group.attrs[0]);
kfree(p->gid_group.attrs);
err_free_pkey:
for (i = 0; i < dev->dev->caps.pkey_table_len[port_num]; ++i)
kfree(p->pkey_group.attrs[i]);
kfree(p->pkey_group.attrs);
err_alloc:
kobject_put(dev->dev_ports_parent[slave]);
kfree(p);
return ret;
}
static int register_one_pkey_tree(struct mlx4_ib_dev *dev, int slave)
{
char name[32];
int err;
int port;
struct kobject *p, *t;
struct mlx4_port *mport;
get_name(dev, name, slave, sizeof name);
dev->pkeys.device_parent[slave] =
kobject_create_and_add(name, kobject_get(dev->iov_parent));
if (!dev->pkeys.device_parent[slave]) {
err = -ENOMEM;
goto fail_dev;
}
INIT_LIST_HEAD(&dev->pkeys.pkey_port_list[slave]);
dev->dev_ports_parent[slave] =
kobject_create_and_add("ports",
kobject_get(dev->pkeys.device_parent[slave]));
if (!dev->dev_ports_parent[slave]) {
err = -ENOMEM;
goto err_ports;
}
for (port = 1; port <= dev->dev->caps.num_ports; ++port) {
err = add_port(dev, port, slave);
if (err)
goto err_add;
}
return 0;
err_add:
list_for_each_entry_safe(p, t,
&dev->pkeys.pkey_port_list[slave],
entry) {
list_del(&p->entry);
mport = container_of(p, struct mlx4_port, kobj);
sysfs_remove_group(p, &mport->pkey_group);
sysfs_remove_group(p, &mport->gid_group);
kobject_put(p);
}
kobject_put(dev->dev_ports_parent[slave]);
err_ports:
kobject_put(dev->pkeys.device_parent[slave]);
/* extra put for the device_parent create_and_add */
kobject_put(dev->pkeys.device_parent[slave]);
fail_dev:
kobject_put(dev->iov_parent);
return err;
}
static int register_pkey_tree(struct mlx4_ib_dev *device)
{
int i;
if (!mlx4_is_master(device->dev))
return 0;
for (i = 0; i <= device->dev->num_vfs; ++i)
register_one_pkey_tree(device, i);
return 0;
}
static void unregister_pkey_tree(struct mlx4_ib_dev *device)
{
int slave;
struct kobject *p, *t;
struct mlx4_port *port;
if (!mlx4_is_master(device->dev))
return;
for (slave = device->dev->num_vfs; slave >= 0; --slave) {
list_for_each_entry_safe(p, t,
&device->pkeys.pkey_port_list[slave],
entry) {
list_del(&p->entry);
port = container_of(p, struct mlx4_port, kobj);
sysfs_remove_group(p, &port->pkey_group);
sysfs_remove_group(p, &port->gid_group);
kobject_put(p);
kobject_put(device->dev_ports_parent[slave]);
}
kobject_put(device->dev_ports_parent[slave]);
kobject_put(device->pkeys.device_parent[slave]);
kobject_put(device->pkeys.device_parent[slave]);
kobject_put(device->iov_parent);
}
}
int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev)
{
int i;
int ret = 0;
if (!mlx4_is_master(dev->dev))
return 0;
dev->iov_parent =
kobject_create_and_add("iov",
kobject_get(dev->ib_dev.ports_parent->parent));
if (!dev->iov_parent) {
ret = -ENOMEM;
goto err;
}
dev->ports_parent =
kobject_create_and_add("ports",
kobject_get(dev->iov_parent));
if (!dev->iov_parent) {
ret = -ENOMEM;
goto err_ports;
}
for (i = 1; i <= dev->ib_dev.phys_port_cnt; ++i) {
ret = add_port_entries(dev, i);
if (ret)
goto err_add_entries;
}
ret = register_pkey_tree(dev);
if (ret)
goto err_add_entries;
return 0;
err_add_entries:
kobject_put(dev->ports_parent);
err_ports:
kobject_put(dev->iov_parent);
err:
kobject_put(dev->ib_dev.ports_parent->parent);
pr_err("mlx4_ib_device_register_sysfs error (%d)\n", ret);
return ret;
}
static void unregister_alias_guid_tree(struct mlx4_ib_dev *device)
{
struct mlx4_ib_iov_port *p;
int i;
if (!mlx4_is_master(device->dev))
return;
for (i = 0; i < device->dev->caps.num_ports; i++) {
p = &device->iov_ports[i];
kobject_put(p->admin_alias_parent);
kobject_put(p->gids_parent);
kobject_put(p->pkeys_parent);
kobject_put(p->mcgs_parent);
kobject_put(p->cur_port);
kobject_put(p->cur_port);
kobject_put(p->cur_port);
kobject_put(p->cur_port);
kobject_put(p->cur_port);
kobject_put(p->dev->ports_parent);
kfree(p->dentr_ar);
}
}
void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device)
{
unregister_alias_guid_tree(device);
unregister_pkey_tree(device);
kobject_put(device->ports_parent);
kobject_put(device->iov_parent);
kobject_put(device->iov_parent);
kobject_put(device->ib_dev.ports_parent->parent);
}

View File

@ -40,7 +40,9 @@
* Increment this value if any changes that break userspace ABI
* compatibility are made.
*/
#define MLX4_IB_UVERBS_ABI_VERSION 3
#define MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION 3
#define MLX4_IB_UVERBS_ABI_VERSION 4
/*
* Make sure that all structs defined in this file remain laid out so
@ -50,12 +52,20 @@
* instead.
*/
struct mlx4_ib_alloc_ucontext_resp {
struct mlx4_ib_alloc_ucontext_resp_v3 {
__u32 qp_tab_size;
__u16 bf_reg_size;
__u16 bf_regs_per_page;
};
struct mlx4_ib_alloc_ucontext_resp {
__u32 dev_caps;
__u32 qp_tab_size;
__u16 bf_reg_size;
__u16 bf_regs_per_page;
__u32 cqe_size;
};
struct mlx4_ib_alloc_pd_resp {
__u32 pdn;
__u32 reserved;

View File

@ -71,4 +71,3 @@ int mlx4_wc_enabled(void)
}
#endif

View File

@ -1808,7 +1808,7 @@ int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn,
case IB_QPT_RAW_IPV6:
op_mod = 2;
break;
case IB_QPT_RAW_ETY:
case IB_QPT_RAW_ETHERTYPE:
op_mod = 3;
break;
default:

View File

@ -1325,7 +1325,7 @@ static void __init mthca_validate_profile(void)
if (log_mtts_per_seg == 0)
log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8);
if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 5)) {
printk(KERN_WARNING PFX "bad log_mtts_per_seg (%d). Using default - %ld\n",
printk(KERN_WARNING PFX "bad log_mtts_per_seg (%d). Using default - %d\n",
log_mtts_per_seg, ilog2(MTHCA_MTT_SEG_SIZE / 8));
log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8);
}

View File

@ -448,6 +448,7 @@ static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int pag
page * MTHCA_ICM_PAGE_SIZE;
}
#include <vm/vm_map.h>
#include <vm/vm_pageout.h>
#include <vm/pmap.h>

View File

@ -1006,7 +1006,7 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
}
static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
u64 virt, int acc, struct ib_udata *udata, int mr_id)
{
struct mthca_dev *dev = to_mdev(pd->device);
struct ib_umem_chunk *chunk;
@ -1402,7 +1402,7 @@ int mthca_register_device(struct mthca_dev *dev)
mutex_init(&dev->cap_mask_mutex);
ret = ib_register_device(&dev->ib_dev);
ret = ib_register_device(&dev->ib_dev, NULL);
if (ret)
return ret;

View File

@ -109,7 +109,8 @@ enum {
IPOIB_ENCAP_LEN = 4,
IPOIB_HEADER_LEN = IPOIB_ENCAP_LEN + INFINIBAND_ALEN,
IPOIB_UD_MAX_MTU = 4 * 1024,
IPOIB_UD_RX_SG = (IPOIB_UD_MAX_MTU / MJUMPAGESIZE),
// IPOIB_UD_RX_SG = (IPOIB_UD_MAX_MTU / MJUMPAGESIZE),
IPOIB_UD_RX_SG = 2,
IPOIB_UD_TX_SG = (IPOIB_UD_MAX_MTU / MCLBYTES) + 2,
IPOIB_CM_MAX_MTU = (64 * 1024),
IPOIB_CM_TX_SG = (IPOIB_CM_MAX_MTU / MCLBYTES) + 2,

View File

@ -1539,3 +1539,20 @@ ipoib_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
module_init(ipoib_init_module);
module_exit(ipoib_cleanup_module);
#undef MODULE_VERSION
#include <sys/module.h>
static int
ipoib_evhand(module_t mod, int event, void *arg)
{
return (0);
}
static moduledata_t ipoib_mod = {
.name = "ipoib",
.evhand = ipoib_evhand,
};
DECLARE_MODULE(ipoib, ipoib_mod, SI_SUB_SMP, SI_ORDER_ANY);
MODULE_DEPEND(ipoib, ibcore, 1, 1, 1);

View File

@ -1,9 +1,34 @@
obj-$(CONFIG_MLX4_CORE) += mlx4_core.o
# $FreeBSD$
#.PATH: ${.CURDIR}/../../ofed/drivers/net/mlx4:${.CURDIR}/../../ofed/include/linux
.PATH: ${.CURDIR}/../../../../../include/linux
mlx4_core-y := alloc.o catas.o cmd.o cq.o eq.o fw.o icm.o intf.o main.o mcg.o \
mr.o pd.o port.o profile.o qp.o reset.o sense.o srq.o xrcd.o
.include <bsd.own.mk>
obj-$(CONFIG_MLX4_EN) += mlx4_en.o
mlx4_en-y := en_main.o en_tx.o en_rx.o en_ethtool.o en_port.o en_cq.o \
en_resources.o en_netdev.o en_frag.o en_selftest.o
KMOD = mlx4
SRCS = device_if.h bus_if.h pci_if.h vnode_if.h
SRCS+= alloc.c catas.c cmd.c cq.c eq.c fw.c icm.c intf.c main.c mcg.c mr.c linux_compat.c linux_radix.c
SRCS+= pd.c port.c profile.c qp.c reset.c sense.c srq.c resource_tracker.c sys_tune.c
SRCS+= opt_inet.h opt_inet6.h
#CFLAGS+= -I${.CURDIR}/../../ofed/drivers/net/mlx4
#CFLAGS+= -I${.CURDIR}/../../ofed/include/
CFLAGS+= -I${.CURDIR}/../../../../../include
.if !defined(KERNBUILDDIR)
.if ${MK_INET_SUPPORT} != "no"
opt_inet.h:
@echo "#define INET 1" > ${.TARGET}
.endif
.if ${MK_INET6_SUPPORT} != "no"
opt_inet6.h:
@echo "#define INET6 1" > ${.TARGET}
.endif
.endif
.include <bsd.kmod.mk>
CFLAGS+= -Wno-cast-qual -Wno-pointer-arith -fms-extensions

View File

@ -34,6 +34,7 @@
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/mm.h>
//#include <linux/export.h> /* XXX SK probabaly not needed in freeBSD XXX */
#include <linux/bitmap.h>
#include <linux/dma-mapping.h>
#include <linux/vmalloc.h>
@ -77,14 +78,15 @@ void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj)
static unsigned long find_aligned_range(unsigned long *bitmap,
u32 start, u32 nbits,
int len, int align)
int len, int align, u32 skip_mask)
{
unsigned long end, i;
again:
start = ALIGN(start, align);
while ((start < nbits) && test_bit(start, bitmap))
while ((start < nbits) && (test_bit(start, bitmap) ||
(start & skip_mask)))
start += align;
if (start >= nbits)
@ -95,7 +97,7 @@ static unsigned long find_aligned_range(unsigned long *bitmap,
return -1;
for (i = start + 1; i < end; i++) {
if (test_bit(i, bitmap)) {
if (test_bit(i, bitmap) || ((u32)i & skip_mask)) {
start = i + 1;
goto again;
}
@ -104,27 +106,27 @@ static unsigned long find_aligned_range(unsigned long *bitmap,
return start;
}
u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align)
u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt,
int align, u32 skip_mask)
{
u32 obj, i;
u32 obj;
if (likely(cnt == 1 && align == 1))
if (likely(cnt == 1 && align == 1 && !skip_mask))
return mlx4_bitmap_alloc(bitmap);
spin_lock(&bitmap->lock);
obj = find_aligned_range(bitmap->table, bitmap->last,
bitmap->max, cnt, align);
bitmap->max, cnt, align, skip_mask);
if (obj >= bitmap->max) {
bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
& bitmap->mask;
obj = find_aligned_range(bitmap->table, 0, bitmap->max,
cnt, align);
cnt, align, skip_mask);
}
if (obj < bitmap->max) {
for (i = 0; i < cnt; i++)
set_bit(obj + i, bitmap->table);
bitmap_set(bitmap->table, obj, cnt);
if (obj == bitmap->last) {
bitmap->last = (obj + cnt);
if (bitmap->last >= bitmap->max)
@ -149,16 +151,10 @@ u32 mlx4_bitmap_avail(struct mlx4_bitmap *bitmap)
void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt)
{
u32 i;
obj &= bitmap->max + bitmap->reserved_top - 1;
spin_lock(&bitmap->lock);
for (i = 0; i < cnt; i++)
clear_bit(obj + i, bitmap->table);
bitmap->last = min(bitmap->last, obj);
bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
& bitmap->mask;
bitmap_clear(bitmap->table, obj, cnt);
bitmap->avail += cnt;
spin_unlock(&bitmap->lock);
}
@ -166,12 +162,17 @@ void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt)
int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask,
u32 reserved_bot, u32 reserved_top)
{
int i;
/* sanity check */
if (num <= (u64)reserved_top + reserved_bot)
return -EINVAL;
/* num must be a power of 2 */
if (num != roundup_pow_of_two(num))
return -EINVAL;
if (reserved_bot + reserved_top >= num)
return -EINVAL;
bitmap->last = 0;
bitmap->top = 0;
bitmap->max = num - reserved_top;
@ -184,8 +185,7 @@ int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask,
if (!bitmap->table)
return -ENOMEM;
for (i = 0; i < reserved_bot; ++i)
set_bit(i, bitmap->table);
bitmap_set(bitmap->table, 0, reserved_bot);
return 0;
}
@ -207,7 +207,6 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
{
dma_addr_t t;
buf->direct.buf = NULL;
if (size <= max_direct) {
buf->nbufs = 1;
buf->npages = 1;
@ -229,11 +228,10 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
int i;
buf->direct.buf = NULL;
buf->direct.map = 0;
buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE;
buf->npages = buf->nbufs;
buf->page_shift = PAGE_SHIFT;
buf->page_list = kzalloc(buf->nbufs * sizeof *buf->page_list,
buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list),
GFP_KERNEL);
if (!buf->page_list)
return -ENOMEM;
@ -291,7 +289,6 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
buf->page_list[i].map);
kfree(buf->page_list);
}
buf->direct.buf = NULL;
}
EXPORT_SYMBOL_GPL(mlx4_buf_free);

View File

@ -32,10 +32,12 @@
*/
#include <linux/workqueue.h>
#include <linux/module.h>
#include "mlx4.h"
#define MLX4_CATAS_POLL_INTERVAL (5 * HZ)
#define MLX4_CATAS_POLL_INTERVAL (5 * HZ)
static DEFINE_SPINLOCK(catas_lock);
@ -45,7 +47,8 @@ static struct work_struct catas_work;
static int internal_err_reset = 1;
module_param(internal_err_reset, int, 0644);
MODULE_PARM_DESC(internal_err_reset,
"Reset device on internal errors if non-zero (default 1)");
"Reset device on internal errors if non-zero"
" (default 1, in SRIOV mode default is 0)");
static void dump_err_buf(struct mlx4_dev *dev)
{
@ -65,16 +68,21 @@ static void poll_catas(unsigned long dev_ptr)
struct mlx4_priv *priv = mlx4_priv(dev);
if (readl(priv->catas_err.map)) {
dump_err_buf(dev);
/* If the device is off-line, we cannot try to recover it */
if (pci_channel_offline(dev->pdev))
mod_timer(&priv->catas_err.timer,
round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL));
else {
dump_err_buf(dev);
mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
if (internal_err_reset) {
spin_lock(&catas_lock);
list_add(&priv->catas_err.list, &catas_list);
spin_unlock(&catas_lock);
if (internal_err_reset) {
spin_lock(&catas_lock);
list_add(&priv->catas_err.list, &catas_list);
spin_unlock(&catas_lock);
queue_work(mlx4_wq, &catas_work);
queue_work(mlx4_wq, &catas_work);
}
}
} else
mod_timer(&priv->catas_err.timer,
@ -89,9 +97,6 @@ static void catas_reset(struct work_struct *work)
LIST_HEAD(tlist);
int ret;
if (!mutex_trylock(&drv_mutex))
return;
spin_lock_irq(&catas_lock);
list_splice_init(&catas_list, &tlist);
spin_unlock_irq(&catas_lock);
@ -99,23 +104,30 @@ static void catas_reset(struct work_struct *work)
list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) {
struct pci_dev *pdev = priv->dev.pdev;
/* If the device is off-line, we cannot reset it */
if (pci_channel_offline(pdev))
continue;
ret = mlx4_restart_one(priv->dev.pdev);
/* 'priv' now is not valid */
if (ret)
printk(KERN_ERR "mlx4 %s: Reset failed (%d)\n",
pci_name(pdev), ret);
pr_err("mlx4 %s: Reset failed (%d)\n",
pci_name(pdev), ret);
else {
dev = pci_get_drvdata(pdev);
mlx4_dbg(dev, "Reset succeeded\n");
}
}
mutex_unlock(&drv_mutex);
}
void mlx4_start_catas_poll(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
unsigned long addr;
phys_addr_t addr;
/*If we are in SRIOV the default of the module param must be 0*/
if (mlx4_is_mfunc(dev))
internal_err_reset = 0;
INIT_LIST_HEAD(&priv->catas_err.list);
init_timer(&priv->catas_err.timer);
@ -126,8 +138,8 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev)
priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
if (!priv->catas_err.map) {
mlx4_warn(dev, "Failed to map internal error buffer at 0x%lx\n",
addr);
mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n",
(unsigned long long) addr);
return;
}

File diff suppressed because it is too large Load Diff

View File

@ -43,27 +43,6 @@
#include "mlx4.h"
#include "icm.h"
struct mlx4_cq_context {
__be32 flags;
u16 reserved1[3];
__be16 page_offset;
__be32 logsize_usrpage;
__be16 cq_period;
__be16 cq_max_count;
u8 reserved2[3];
u8 comp_eqn;
u8 log_page_size;
u8 reserved3[2];
u8 mtt_base_addr_h;
__be32 mtt_base_addr_l;
__be32 last_notified_index;
__be32 solicit_producer_index;
__be32 consumer_index;
__be32 producer_index;
u32 reserved4[2];
__be64 db_rec_addr;
};
#define MLX4_CQ_STATUS_OK ( 0 << 28)
#define MLX4_CQ_STATUS_OVERFLOW ( 9 << 28)
#define MLX4_CQ_STATUS_WRITE_FAIL (10 << 28)
@ -75,10 +54,16 @@ struct mlx4_cq_context {
void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn)
{
struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
struct mlx4_cq *cq;
spin_lock(&cq_table->lock);
cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree,
cqn & (dev->caps.num_cqs - 1));
if (cq)
atomic_inc(&cq->refcount);
spin_unlock(&cq_table->lock);
if (!cq) {
mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn);
return;
@ -87,6 +72,9 @@ void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn)
++cq->arm_sn;
cq->comp(cq);
if (atomic_dec_and_test(&cq->refcount))
complete(&cq->free);
}
void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type)
@ -116,23 +104,24 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type)
static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int cq_num)
{
return mlx4_cmd(dev, mailbox->dma, cq_num, 0, MLX4_CMD_SW2HW_CQ,
MLX4_CMD_TIME_CLASS_A);
return mlx4_cmd(dev, mailbox->dma, cq_num, 0,
MLX4_CMD_SW2HW_CQ, MLX4_CMD_TIME_CLASS_A,
MLX4_CMD_WRAPPED);
}
static int mlx4_MODIFY_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int cq_num, u32 opmod)
{
return mlx4_cmd(dev, mailbox->dma, cq_num, opmod, MLX4_CMD_MODIFY_CQ,
MLX4_CMD_TIME_CLASS_A);
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
}
static int mlx4_HW2SW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int cq_num)
{
return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, cq_num,
mailbox ? 0 : 1, MLX4_CMD_HW2SW_CQ,
MLX4_CMD_TIME_CLASS_A);
return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0,
cq_num, mailbox ? 0 : 1, MLX4_CMD_HW2SW_CQ,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
}
int mlx4_cq_modify(struct mlx4_dev *dev, struct mlx4_cq *cq,
@ -187,25 +176,121 @@ int mlx4_cq_resize(struct mlx4_dev *dev, struct mlx4_cq *cq,
}
EXPORT_SYMBOL_GPL(mlx4_cq_resize);
static int mlx4_find_least_loaded_vector(struct mlx4_priv *priv)
int mlx4_cq_ignore_overrun(struct mlx4_dev *dev, struct mlx4_cq *cq)
{
int i;
int index = 0;
int min = priv->eq_table.eq[0].load;
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_cq_context *cq_context;
int err;
for (i = 1; i < priv->dev.caps.num_comp_vectors; i++) {
if (priv->eq_table.eq[i].load < min) {
index = i;
min = priv->eq_table.eq[i].load;
}
}
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
return index;
cq_context = mailbox->buf;
memset(cq_context, 0, sizeof *cq_context);
cq_context->flags |= cpu_to_be32(MLX4_CQ_FLAG_OI);
err = mlx4_MODIFY_CQ(dev, mailbox, cq->cqn, 3);
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
EXPORT_SYMBOL_GPL(mlx4_cq_ignore_overrun);
int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cq_table *cq_table = &priv->cq_table;
int err;
*cqn = mlx4_bitmap_alloc(&cq_table->bitmap);
if (*cqn == -1)
return -ENOMEM;
err = mlx4_table_get(dev, &cq_table->table, *cqn);
if (err)
goto err_out;
err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn);
if (err)
goto err_put;
return 0;
err_put:
mlx4_table_put(dev, &cq_table->table, *cqn);
err_out:
mlx4_bitmap_free(&cq_table->bitmap, *cqn);
return err;
}
int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
unsigned vector, int collapsed)
static int mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn)
{
u64 out_param;
int err;
if (mlx4_is_mfunc(dev)) {
err = mlx4_cmd_imm(dev, 0, &out_param, RES_CQ,
RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
if (err)
return err;
else {
*cqn = get_param_l(&out_param);
return 0;
}
}
return __mlx4_cq_alloc_icm(dev, cqn);
}
void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cq_table *cq_table = &priv->cq_table;
mlx4_table_put(dev, &cq_table->cmpt_table, cqn);
mlx4_table_put(dev, &cq_table->table, cqn);
mlx4_bitmap_free(&cq_table->bitmap, cqn);
}
static void mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn)
{
u64 in_param = 0;
int err;
if (mlx4_is_mfunc(dev)) {
set_param_l(&in_param, cqn);
err = mlx4_cmd(dev, in_param, RES_CQ, RES_OP_RESERVE_AND_MAP,
MLX4_CMD_FREE_RES,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
if (err)
mlx4_warn(dev, "Failed freeing cq:%d\n", cqn);
} else
__mlx4_cq_free_icm(dev, cqn);
}
static int mlx4_find_least_loaded_vector(struct mlx4_priv *priv)
{
int i;
int index = 0;
int min = priv->eq_table.eq[0].load;
for (i = 1; i < priv->dev.caps.num_comp_vectors; i++) {
if (priv->eq_table.eq[i].load < min) {
index = i;
min = priv->eq_table.eq[i].load;
}
}
return index;
}
int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
struct mlx4_mtt *mtt, struct mlx4_uar *uar, u64 db_rec,
struct mlx4_cq *cq, unsigned vector, int collapsed,
int timestamp_en)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cq_table *cq_table = &priv->cq_table;
@ -214,29 +299,24 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
u64 mtt_addr;
int err;
cq->vector = (vector == MLX4_LEAST_ATTACHED_VECTOR) ?
mlx4_find_least_loaded_vector(priv) : vector;
cq->vector = (vector == MLX4_LEAST_ATTACHED_VECTOR) ?
mlx4_find_least_loaded_vector(priv) : vector;
if (cq->vector >= dev->caps.num_comp_vectors)
if (cq->vector > dev->caps.num_comp_vectors + dev->caps.comp_pool) {
return -EINVAL;
}
cq->cqn = mlx4_bitmap_alloc(&cq_table->bitmap);
if (cq->cqn == -1)
return -ENOMEM;
err = mlx4_table_get(dev, &cq_table->table, cq->cqn);
if (err)
goto err_out;
err = mlx4_table_get(dev, &cq_table->cmpt_table, cq->cqn);
if (err)
goto err_put;
err = mlx4_cq_alloc_icm(dev, &cq->cqn);
if (err) {
return err;
}
spin_lock_irq(&cq_table->lock);
err = radix_tree_insert(&cq_table->tree, cq->cqn, cq);
spin_unlock_irq(&cq_table->lock);
if (err)
goto err_cmpt_put;
if (err){
goto err_icm;
}
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox)) {
@ -248,6 +328,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
memset(cq_context, 0, sizeof *cq_context);
cq_context->flags = cpu_to_be32(!!collapsed << 18);
if (timestamp_en)
cq_context->flags |= cpu_to_be32(1 << 19);
cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index);
cq_context->comp_eqn = priv->eq_table.eq[cq->vector].eqn;
cq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
@ -262,13 +345,16 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
if (err)
goto err_radix;
priv->eq_table.eq[cq->vector].load++;
priv->eq_table.eq[cq->vector].load++;
cq->cons_index = 0;
cq->arm_sn = 1;
cq->uar = uar;
atomic_set(&cq->refcount, 1);
init_completion(&cq->free);
cq->eqn = priv->eq_table.eq[cq->vector].eqn;
cq->irq = priv->eq_table.eq[cq->vector].irq;
return 0;
err_radix:
@ -276,14 +362,8 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
radix_tree_delete(&cq_table->tree, cq->cqn);
spin_unlock_irq(&cq_table->lock);
err_cmpt_put:
mlx4_table_put(dev, &cq_table->cmpt_table, cq->cqn);
err_put:
mlx4_table_put(dev, &cq_table->table, cq->cqn);
err_out:
mlx4_bitmap_free(&cq_table->bitmap, cq->cqn);
err_icm:
mlx4_cq_free_icm(dev, cq->cqn);
return err;
}
@ -299,8 +379,9 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
if (err)
mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn);
priv->eq_table.eq[cq->vector].load--;
synchronize_irq(priv->eq_table.eq[cq->vector].irq);
priv->eq_table.eq[cq->vector].load--;
spin_lock_irq(&cq_table->lock);
radix_tree_delete(&cq_table->tree, cq->cqn);
@ -310,8 +391,7 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
complete(&cq->free);
wait_for_completion(&cq->free);
mlx4_table_put(dev, &cq_table->table, cq->cqn);
mlx4_bitmap_free(&cq_table->bitmap, cq->cqn);
mlx4_cq_free_icm(dev, cq->cqn);
}
EXPORT_SYMBOL_GPL(mlx4_cq_free);
@ -322,6 +402,8 @@ int mlx4_init_cq_table(struct mlx4_dev *dev)
spin_lock_init(&cq_table->lock);
INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
if (mlx4_is_slave(dev))
return 0;
err = mlx4_bitmap_init(&cq_table->bitmap, dev->caps.num_cqs,
dev->caps.num_cqs - 1, dev->caps.reserved_cqs, 0);
@ -333,6 +415,8 @@ int mlx4_init_cq_table(struct mlx4_dev *dev)
void mlx4_cleanup_cq_table(struct mlx4_dev *dev)
{
if (mlx4_is_slave(dev))
return;
/* Nothing to do to clean up radix_tree */
mlx4_bitmap_cleanup(&mlx4_priv(dev)->cq_table.bitmap);
}

View File

@ -101,10 +101,12 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
if (!cq->is_tx)
cq->size = priv->rx_ring[cq->ring].actual_size;
err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar,
cq->wqres.db.dma, &cq->mcq, cq->vector, cq->is_tx);
if (err)
cq->wqres.db.dma, &cq->mcq, cq->vector, cq->is_tx, 0);
if (err) {
return err;
}
cq->mcq.comp = cq->is_tx ? mlx4_en_tx_irq : mlx4_en_rx_irq;
cq->mcq.event = mlx4_en_cq_event;

View File

@ -88,7 +88,8 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev)
params->tcp_rss = tcp_rss;
params->udp_rss = udp_rss;
if (params->udp_rss && !mdev->dev->caps.udp_rss) {
if (params->udp_rss && !(mdev->dev->caps.flags
& MLX4_DEV_CAP_FLAG_UDP_RSS)) {
mlx4_warn(mdev, "UDP RSS is not supported on this device.\n");
params->udp_rss = 0;
}
@ -116,18 +117,17 @@ static void *get_netdev(struct mlx4_dev *dev, void *ctx, u8 port)
}
static void mlx4_en_event(struct mlx4_dev *dev, void *endev_ptr,
enum mlx4_dev_event event, int port)
enum mlx4_dev_event event, unsigned long port)
{
struct mlx4_en_dev *mdev = (struct mlx4_en_dev *) endev_ptr;
struct mlx4_en_priv *priv;
if (!mdev->pndev[port])
return;
priv = netdev_priv(mdev->pndev[port]);
switch (event) {
case MLX4_DEV_EVENT_PORT_UP:
case MLX4_DEV_EVENT_PORT_DOWN:
if (!mdev->pndev[port])
return;
priv = netdev_priv(mdev->pndev[port]);
/* To prevent races, we poll the link state in a separate
task rather than changing it here */
priv->link_state = event;
@ -139,7 +139,11 @@ static void mlx4_en_event(struct mlx4_dev *dev, void *endev_ptr,
break;
default:
mlx4_warn(mdev, "Unhandled event: %d\n", event);
if (port < 1 || port > dev->caps.num_ports ||
!mdev->pndev[port])
return;
mlx4_warn(mdev, "Unhandled event %d for port %d\n", event,
(int) port);
}
}
@ -351,8 +355,8 @@ static struct mlx4_interface mlx4_en_interface = {
.remove = mlx4_en_remove,
.event = mlx4_en_event,
.query = mlx4_en_query,
.get_prot_dev = get_netdev,
.protocol = MLX4_PROT_EN,
.get_dev = get_netdev,
.protocol = MLX4_PROT_ETH,
};
static int __init mlx4_en_init(void)

View File

@ -632,8 +632,7 @@ int mlx4_en_start_port(struct net_device *dev)
/* Set port mac number */
en_dbg(DRV, priv, "Setting mac for port %d\n", priv->port);
err = mlx4_register_mac(mdev->dev, priv->port,
mlx4_en_mac_to_u64(IF_LLADDR(dev)),
&priv->mac_index);
mlx4_en_mac_to_u64(IF_LLADDR(dev)));
if (err) {
en_err(priv, "Failed setting port mac\n");
goto tx_err;
@ -697,7 +696,7 @@ int mlx4_en_start_port(struct net_device *dev)
mlx4_CLOSE_PORT(mdev->dev, priv->port);
mac_err:
mlx4_unregister_mac(mdev->dev, priv->port, priv->mac_index);
mlx4_unregister_mac(mdev->dev, priv->port, priv->mac);
tx_err:
while (tx_index--) {
mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[tx_index]);
@ -730,7 +729,7 @@ void mlx4_en_stop_port(struct net_device *dev)
priv->port_up = false;
/* Unregister Mac address for the port */
mlx4_unregister_mac(mdev->dev, priv->port, priv->mac_index);
mlx4_unregister_mac(mdev->dev, priv->port, priv->mac);
mdev->mac_removed[priv->port] = 1;
/* Free TX Rings */
@ -946,6 +945,7 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
mutex_unlock(&mdev->state_lock);
mlx4_en_free_resources(priv);
mtx_destroy(&priv->stats_lock.m);
mtx_destroy(&priv->vlan_lock.m);
kfree(priv);
@ -1587,6 +1587,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
/*
* Setup wake-on-lan.
*/
#if 0
if (priv->mdev->dev->caps.wol) {
u64 config;
if (mlx4_wol_read(priv->mdev->dev, &config, priv->port) == 0) {
@ -1596,6 +1597,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
dev->if_capenable |= IFCAP_WOL_MAGIC;
}
}
#endif
/* Register for VLAN events */
priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,

View File

@ -39,13 +39,14 @@
#include <linux/mlx4/device.h>
#include <linux/mlx4/cmd.h>
#if 0 // moved to port.c
int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port,
u64 mac, u64 clear, u8 mode)
{
return mlx4_cmd(dev, (mac | (clear << 63)), port, mode,
MLX4_CMD_SET_MCAST_FLTR, MLX4_CMD_TIME_CLASS_B);
MLX4_CMD_SET_MCAST_FLTR, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
}
#endif
int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, u8 port, u32 *vlans)
{
@ -65,12 +66,13 @@ int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, u8 port, u32 *vlans)
i++, j--)
filter->entry[j] = cpu_to_be32(vlans[i]);
err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_VLAN_FLTR,
MLX4_CMD_TIME_CLASS_B);
MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
#if 0 //moved to port.c - shahark
int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx)
{
@ -94,15 +96,19 @@ int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
MLX4_CMD_TIME_CLASS_B);
MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
u8 promisc)
{
printf("%s %s:%d\n", __func__, __FILE__, __LINE__);
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_set_port_rqp_calc_context *context;
int err;
@ -116,8 +122,10 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
context->base_qpn = cpu_to_be32(base_qpn);
context->promisc = cpu_to_be32(promisc << SET_PORT_PROMISC_EN_SHIFT | base_qpn);
/*
context->mcast = cpu_to_be32((dev->caps.mc_promisc_mode <<
SET_PORT_PROMISC_MODE_SHIFT) | base_qpn);
*/
context->intra_no_vlan = 0;
context->no_vlan = MLX4_NO_VLAN_IDX;
context->intra_vlan_miss = 0;
@ -125,11 +133,12 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
in_mod = MLX4_SET_PORT_RQP_CALC << 8 | port;
err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
MLX4_CMD_TIME_CLASS_B);
MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
#endif
int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port)
{
@ -144,7 +153,7 @@ int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port)
return PTR_ERR(mailbox);
memset(mailbox->buf, 0, sizeof(*qport_context));
err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B);
MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
if (err)
goto out;
qport_context = mailbox->buf;
@ -176,6 +185,7 @@ int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port)
return err;
}
#if 0
static int read_iboe_counters(struct mlx4_dev *dev, int index, u64 counters[])
{
struct mlx4_cmd_mailbox *mailbox;
@ -189,7 +199,7 @@ static int read_iboe_counters(struct mlx4_dev *dev, int index, u64 counters[])
return -ENOMEM;
err = mlx4_cmd_box(dev, 0, mailbox->dma, index, 0,
MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C);
MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_WRAPPED);
if (err)
goto out;
@ -217,6 +227,7 @@ static int read_iboe_counters(struct mlx4_dev *dev, int index, u64 counters[])
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
#endif
int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
{
@ -229,22 +240,24 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
unsigned long ierror;
int err;
int i;
int counter;
//int counter;
u64 counters[4];
dev = mdev->pndev[port];
priv = netdev_priv(dev);
memset(counters, 0, sizeof counters);
/*
counter = mlx4_get_iboe_counter(priv->mdev->dev, port);
if (counter >= 0)
err = read_iboe_counters(priv->mdev->dev, counter, counters);
*/
mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
memset(mailbox->buf, 0, sizeof(*mlx4_en_stats));
err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, in_mod, 0,
MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B);
MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
if (err)
goto out;

View File

@ -39,11 +39,7 @@
#define SET_PORT_PROMISC_EN_SHIFT 31
#define SET_PORT_PROMISC_MODE_SHIFT 30
enum {
MLX4_CMD_SET_VLAN_FLTR = 0x47,
MLX4_CMD_SET_MCAST_FLTR = 0x48,
MLX4_CMD_DUMP_ETH_STATS = 0x49,
};
#if 0 //moved to port.c - shahark
struct mlx4_set_port_general_context {
u8 reserved[3];
@ -72,6 +68,7 @@ struct mlx4_set_port_rqp_calc_context {
__be32 promisc;
__be32 mcast;
};
#endif
#define VLAN_FLTR_SIZE 128
struct mlx4_set_vlan_fltr_mbox {

View File

@ -267,7 +267,6 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv)
int err;
int stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
DS_SIZE * priv->num_frags);
for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
ring = &priv->rx_ring[ring_ind];
@ -673,7 +672,6 @@ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn,
en_err(priv, "Failed to allocate qp context\n");
return -ENOMEM;
}
err = mlx4_qp_alloc(mdev->dev, qpn, qp);
if (err) {
en_err(priv, "Failed to allocate qp #%x\n", qpn);
@ -717,7 +715,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
en_dbg(DRV, priv, "Configuring rss steering\n");
err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num,
roundup_pow_of_two(priv->rx_ring_num),
&rss_map->base_qpn);
&rss_map->base_qpn, 0);
if (err) {
en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num);
return err;
@ -736,7 +734,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
}
/* Configure RSS indirection qp */
err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &priv->base_qpn);
err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &priv->base_qpn, 0);
if (err) {
en_err(priv, "Failed to reserve range for RSS "
"indirection qp\n");

View File

@ -122,7 +122,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
"buf_size:%d dma:%llx\n", ring, ring->buf, ring->size,
ring->buf_size, (unsigned long long) ring->wqres.buf.direct.map);
err = mlx4_qp_reserve_range(mdev->dev, 1, 256, &ring->qpn);
err = mlx4_qp_reserve_range(mdev->dev, 1, 256, &ring->qpn, MLX4_RESERVE_BF_QP);
if (err) {
en_err(priv, "Failed reserving qp for tx ring.\n");
goto err_map;
@ -135,7 +135,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
}
ring->qp.event = mlx4_en_sqp_event;
err = mlx4_bf_alloc(mdev->dev, &ring->bf);
err = mlx4_bf_alloc(mdev->dev, &ring->bf, 0);
if (err) {
ring->bf.uar = &mdev->priv_uar;
ring->bf.uar->map = mdev->uar_map;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -78,10 +78,10 @@ struct mlx4_dev_cap {
u16 wavelength[MLX4_MAX_PORTS + 1];
u64 trans_code[MLX4_MAX_PORTS + 1];
u16 stat_rate_support;
int udp_rss;
int loopback_support;
int wol;
int fs_log_max_ucast_qp_range_size;
int fs_max_num_qp_per_entry;
u64 flags;
u64 flags2;
int reserved_uars;
int uar_size;
int min_page_sz;
@ -108,17 +108,41 @@ struct mlx4_dev_cap {
int dmpt_entry_sz;
int cmpt_entry_sz;
int mtt_entry_sz;
int inline_cfg;
int resize_srq;
u32 bmme_flags;
u32 reserved_lkey;
u64 max_icm_sz;
int max_gso_sz;
int max_rss_tbl_sz;
u8 supported_port_types[MLX4_MAX_PORTS + 1];
u8 suggested_type[MLX4_MAX_PORTS + 1];
u8 default_sense[MLX4_MAX_PORTS + 1];
u8 log_max_macs[MLX4_MAX_PORTS + 1];
u8 log_max_vlans[MLX4_MAX_PORTS + 1];
u32 max_basic_counters;
u32 max_ext_counters;
u32 sync_qp;
u8 timestamp_support;
u32 max_extended_counters;
};
struct mlx4_func_cap {
u8 num_ports;
u8 flags;
u32 pf_context_behaviour;
int qp_quota;
int cq_quota;
int srq_quota;
int mpt_quota;
int mtt_quota;
int max_eq;
int reserved_eq;
int mcg_quota;
u32 qp0_tunnel_qpn;
u32 qp0_proxy_qpn;
u32 qp1_tunnel_qpn;
u32 qp1_proxy_qpn;
u8 physical_port;
u8 port_flags;
};
struct mlx4_adapter {
@ -138,8 +162,10 @@ struct mlx4_init_hca_param {
u64 dmpt_base;
u64 cmpt_base;
u64 mtt_base;
u64 global_caps;
u16 log_mc_entry_sz;
u16 log_mc_hash_sz;
u16 hca_core_clock;
u8 log_num_qps;
u8 log_num_srqs;
u8 log_num_cqs;
@ -148,6 +174,9 @@ struct mlx4_init_hca_param {
u8 log_mc_table_sz;
u8 log_mpt_sz;
u8 log_uar_sz;
u8 uar_page_sz; /* log pg sz in 4k chunks */
u8 steering_mode; /* for QUERY_HCA */
u64 dev_cap_enabled;
};
struct mlx4_init_ib_param {
@ -172,16 +201,27 @@ struct mlx4_set_ib_param {
};
int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap);
int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port,
struct mlx4_func_cap *func_cap);
int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_MAP_FA(struct mlx4_dev *dev, struct mlx4_icm *icm);
int mlx4_UNMAP_FA(struct mlx4_dev *dev);
int mlx4_RUN_FW(struct mlx4_dev *dev);
int mlx4_QUERY_FW(struct mlx4_dev *dev);
int mlx4_QUERY_ADAPTER(struct mlx4_dev *dev, struct mlx4_adapter *adapter);
int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param);
int mlx4_QUERY_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param);
int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic);
int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt);
int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages);
int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm);
int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev);
int mlx4_NOP(struct mlx4_dev *dev);
int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg);
void mlx4_opreq_action(struct work_struct *work);
#endif /* MLX4_FW_H */

View File

@ -31,10 +31,10 @@
* SOFTWARE.
*/
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <linux/mlx4/cmd.h>
@ -93,13 +93,17 @@ void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent)
kfree(icm);
}
static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask)
static int mlx4_alloc_icm_pages(struct scatterlist *mem, int order,
gfp_t gfp_mask, int node)
{
struct page *page;
page = alloc_pages(gfp_mask, order);
if (!page)
return -ENOMEM;
page = alloc_pages_node(node, gfp_mask, order);
if (!page) {
page = alloc_pages(gfp_mask, order);
if (!page)
return -ENOMEM;
}
sg_set_page(mem, page, PAGE_SIZE << order, 0);
return 0;
@ -130,9 +134,13 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
/* We use sg_set_buf for coherent allocs, which assumes low memory */
BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM));
icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
if (!icm)
return NULL;
icm = kmalloc_node(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN),
dev->numa_node);
if (!icm) {
icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
if (!icm)
return NULL;
}
icm->refcount = 0;
INIT_LIST_HEAD(&icm->chunk_list);
@ -141,10 +149,15 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
while (npages > 0) {
if (!chunk) {
chunk = kmalloc(sizeof *chunk,
gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
if (!chunk)
goto fail;
chunk = kmalloc_node(sizeof *chunk,
gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN),
dev->numa_node);
if (!chunk) {
chunk = kmalloc(sizeof *chunk,
gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
if (!chunk)
goto fail;
}
sg_init_table(chunk->mem, MLX4_ICM_CHUNK_LEN);
chunk->npages = 0;
@ -161,31 +174,33 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
cur_order, gfp_mask);
else
ret = mlx4_alloc_icm_pages(&chunk->mem[chunk->npages],
cur_order, gfp_mask);
cur_order, gfp_mask,
dev->numa_node);
if (!ret) {
++chunk->npages;
if (ret) {
if (--cur_order < 0)
goto fail;
else
continue;
}
if (coherent)
++chunk->nsg;
else if (chunk->npages == MLX4_ICM_CHUNK_LEN) {
chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
chunk->npages,
PCI_DMA_BIDIRECTIONAL);
++chunk->npages;
if (chunk->nsg <= 0)
goto fail;
}
if (coherent)
++chunk->nsg;
else if (chunk->npages == MLX4_ICM_CHUNK_LEN) {
chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
chunk->npages,
PCI_DMA_BIDIRECTIONAL);
if (chunk->npages == MLX4_ICM_CHUNK_LEN)
chunk = NULL;
npages -= 1 << cur_order;
} else {
--cur_order;
if (cur_order < 0)
if (chunk->nsg <= 0)
goto fail;
}
if (chunk->npages == MLX4_ICM_CHUNK_LEN)
chunk = NULL;
npages -= 1 << cur_order;
}
if (!coherent && chunk) {
@ -209,36 +224,10 @@ static int mlx4_MAP_ICM(struct mlx4_dev *dev, struct mlx4_icm *icm, u64 virt)
return mlx4_map_cmd(dev, MLX4_CMD_MAP_ICM, icm, virt);
}
int mlx4_UNMAP_ICM(struct mlx4_dev *dev, u64 virt, u32 page_count)
static int mlx4_UNMAP_ICM(struct mlx4_dev *dev, u64 virt, u32 page_count)
{
return mlx4_cmd(dev, virt, page_count, 0, MLX4_CMD_UNMAP_ICM,
MLX4_CMD_TIME_CLASS_B);
}
int mlx4_MAP_ICM_page(struct mlx4_dev *dev, u64 dma_addr, u64 virt)
{
struct mlx4_cmd_mailbox *mailbox;
__be64 *inbox;
int err;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
inbox = mailbox->buf;
inbox[0] = cpu_to_be64(virt);
inbox[1] = cpu_to_be64(dma_addr);
err = mlx4_cmd(dev, mailbox->dma, 1, 0, MLX4_CMD_MAP_ICM,
MLX4_CMD_TIME_CLASS_B);
mlx4_free_cmd_mailbox(dev, mailbox);
if (!err)
mlx4_dbg(dev, "Mapped page at %llx to %llx for ICM.\n",
(unsigned long long) dma_addr, (unsigned long long) virt);
return err;
MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
}
int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm)
@ -248,12 +237,14 @@ int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm)
int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev)
{
return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_ICM_AUX, MLX4_CMD_TIME_CLASS_B);
return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_ICM_AUX,
MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
}
int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj)
{
int i = (obj & (table->num_obj - 1)) / (MLX4_TABLE_CHUNK_SIZE / table->obj_size);
u32 i = (obj & (table->num_obj - 1)) /
(MLX4_TABLE_CHUNK_SIZE / table->obj_size);
int ret = 0;
mutex_lock(&table->mutex);
@ -286,16 +277,18 @@ int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
return ret;
}
void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj)
{
int i;
u32 i;
u64 offset;
i = (obj & (table->num_obj - 1)) / (MLX4_TABLE_CHUNK_SIZE / table->obj_size);
mutex_lock(&table->mutex);
if (--table->icm[i]->refcount == 0) {
mlx4_UNMAP_ICM(dev, table->virt + i * MLX4_TABLE_CHUNK_SIZE,
offset = (u64) i * MLX4_TABLE_CHUNK_SIZE;
mlx4_UNMAP_ICM(dev, table->virt + offset,
MLX4_TABLE_CHUNK_SIZE / MLX4_ICM_PAGE_SIZE);
mlx4_free_icm(dev, table->icm[i], table->coherent);
table->icm[i] = NULL;
@ -304,9 +297,11 @@ void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
mutex_unlock(&table->mutex);
}
void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_handle)
void *mlx4_table_find(struct mlx4_icm_table *table, u32 obj,
dma_addr_t *dma_handle)
{
int idx, offset, dma_offset, i;
int offset, dma_offset, i;
u64 idx;
struct mlx4_icm_chunk *chunk;
struct mlx4_icm *icm;
struct page *page = NULL;
@ -316,7 +311,7 @@ void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_han
mutex_lock(&table->mutex);
idx = (obj & (table->num_obj - 1)) * table->obj_size;
idx = (u64) (obj & (table->num_obj - 1)) * table->obj_size;
icm = table->icm[idx / MLX4_TABLE_CHUNK_SIZE];
dma_offset = offset = idx % MLX4_TABLE_CHUNK_SIZE;
@ -350,10 +345,11 @@ void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_han
}
int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
int start, int end)
u32 start, u32 end)
{
int inc = MLX4_TABLE_CHUNK_SIZE / table->obj_size;
int i, err;
int err;
u32 i;
for (i = start; i <= end; i += inc) {
err = mlx4_table_get(dev, table, i);
@ -373,22 +369,23 @@ int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
}
void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
int start, int end)
u32 start, u32 end)
{
int i;
u32 i;
for (i = start; i <= end; i += MLX4_TABLE_CHUNK_SIZE / table->obj_size)
mlx4_table_put(dev, table, i);
}
int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
u64 virt, int obj_size, int nobj, int reserved,
u64 virt, int obj_size, u32 nobj, int reserved,
int use_lowmem, int use_coherent)
{
int obj_per_chunk;
int num_icm;
unsigned chunk_size;
int i;
u64 size;
obj_per_chunk = MLX4_TABLE_CHUNK_SIZE / obj_size;
num_icm = (nobj + obj_per_chunk - 1) / obj_per_chunk;
@ -404,10 +401,12 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
table->coherent = use_coherent;
mutex_init(&table->mutex);
size = (u64) nobj * obj_size;
for (i = 0; i * MLX4_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) {
chunk_size = MLX4_TABLE_CHUNK_SIZE;
if ((i + 1) * MLX4_TABLE_CHUNK_SIZE > nobj * obj_size)
chunk_size = PAGE_ALIGN(nobj * obj_size - i * MLX4_TABLE_CHUNK_SIZE);
if ((i + 1) * MLX4_TABLE_CHUNK_SIZE > size)
chunk_size = PAGE_ALIGN(size -
i * MLX4_TABLE_CHUNK_SIZE);
table->icm[i] = mlx4_alloc_icm(dev, chunk_size >> PAGE_SHIFT,
(use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
@ -437,6 +436,8 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
mlx4_free_icm(dev, table->icm[i], use_coherent);
}
kfree(table->icm);
return -ENOMEM;
}

View File

@ -71,17 +71,17 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
gfp_t gfp_mask, int coherent);
void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent);
int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj);
void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj);
int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
u32 start, u32 end);
void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
u32 start, u32 end);
int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
u64 virt, int obj_size, int nobj, int reserved,
u64 virt, int obj_size, u32 nobj, int reserved,
int use_lowmem, int use_coherent);
void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table);
int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_handle);
int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
int start, int end);
void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
int start, int end);
void *mlx4_table_find(struct mlx4_icm_table *table, u32 obj, dma_addr_t *dma_handle);
static inline void mlx4_icm_first(struct mlx4_icm *icm,
struct mlx4_icm_iter *iter)
@ -122,9 +122,5 @@ static inline unsigned long mlx4_icm_size(struct mlx4_icm_iter *iter)
return sg_dma_len(&iter->chunk->mem[iter->page_idx]);
}
int mlx4_UNMAP_ICM(struct mlx4_dev *dev, u64 virt, u32 page_count);
int mlx4_MAP_ICM_page(struct mlx4_dev *dev, u64 dma_addr, u64 virt);
int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm);
int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev);
#endif /* MLX4_ICM_H */

View File

@ -31,6 +31,8 @@
* SOFTWARE.
*/
#include <linux/slab.h>
#include "mlx4.h"
struct mlx4_device_context {
@ -112,37 +114,8 @@ void mlx4_unregister_interface(struct mlx4_interface *intf)
}
EXPORT_SYMBOL_GPL(mlx4_unregister_interface);
struct mlx4_dev *mlx4_query_interface(void *int_dev, int *port)
{
struct mlx4_priv *priv;
struct mlx4_device_context *dev_ctx;
enum mlx4_query_reply r;
unsigned long flags;
mutex_lock(&intf_mutex);
list_for_each_entry(priv, &dev_list, dev_list) {
spin_lock_irqsave(&priv->ctx_lock, flags);
list_for_each_entry(dev_ctx, &priv->ctx_list, list) {
if (!dev_ctx->intf->query)
continue;
r = dev_ctx->intf->query(dev_ctx->context, int_dev);
if (r != MLX4_QUERY_NOT_MINE) {
*port = r;
spin_unlock_irqrestore(&priv->ctx_lock, flags);
mutex_unlock(&intf_mutex);
return &priv->dev;
}
}
spin_unlock_irqrestore(&priv->ctx_lock, flags);
}
mutex_unlock(&intf_mutex);
return NULL;
}
EXPORT_SYMBOL_GPL(mlx4_query_interface);
void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, int port)
void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type,
unsigned long param)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_device_context *dev_ctx;
@ -152,7 +125,7 @@ void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type, int por
list_for_each_entry(dev_ctx, &priv->ctx_list, list)
if (dev_ctx->intf->event)
dev_ctx->intf->event(dev, dev_ctx->context, type, port);
dev_ctx->intf->event(dev, dev_ctx->context, type, param);
spin_unlock_irqrestore(&priv->ctx_lock, flags);
}
@ -169,7 +142,8 @@ int mlx4_register_device(struct mlx4_dev *dev)
mlx4_add_device(intf, priv);
mutex_unlock(&intf_mutex);
mlx4_start_catas_poll(dev);
if (!mlx4_is_slave(dev))
mlx4_start_catas_poll(dev);
return 0;
}
@ -179,7 +153,8 @@ void mlx4_unregister_device(struct mlx4_dev *dev)
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_interface *intf;
mlx4_stop_catas_poll(dev);
if (!mlx4_is_slave(dev))
mlx4_stop_catas_poll(dev);
mutex_lock(&intf_mutex);
list_for_each_entry(intf, &intf_list, list)
@ -190,7 +165,7 @@ void mlx4_unregister_device(struct mlx4_dev *dev)
mutex_unlock(&intf_mutex);
}
void *mlx4_find_get_prot_dev(struct mlx4_dev *dev, enum mlx4_prot proto, int port)
void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int port)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_device_context *dev_ctx;
@ -200,13 +175,13 @@ void *mlx4_find_get_prot_dev(struct mlx4_dev *dev, enum mlx4_prot proto, int por
spin_lock_irqsave(&priv->ctx_lock, flags);
list_for_each_entry(dev_ctx, &priv->ctx_list, list)
if (dev_ctx->intf->protocol == proto && dev_ctx->intf->get_prot_dev) {
result = dev_ctx->intf->get_prot_dev(dev, dev_ctx->context, port);
if (dev_ctx->intf->protocol == proto && dev_ctx->intf->get_dev) {
result = dev_ctx->intf->get_dev(dev, dev_ctx->context, port);
break;
}
}
spin_unlock_irqrestore(&priv->ctx_lock, flags);
return result;
}
EXPORT_SYMBOL_GPL(mlx4_get_protocol_dev);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -568,6 +568,7 @@ enum mlx4_en_wol {
MLX4_EN_WOL_DO_MODIFY = (1ULL << 63),
};
int mlx4_en_transmit(struct net_device *dev, struct mbuf *mb);
void mlx4_en_qflush(struct net_device *dev);
@ -635,12 +636,12 @@ void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv);
int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring);
void mlx4_en_rx_irq(struct mlx4_cq *mcq);
int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode);
//int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode);
int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, u8 port, u32 *vlans);
int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx);
int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
u8 promisc);
//int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
// u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx);
//int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
// u8 promisc);
int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset);
int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port);

View File

@ -34,34 +34,15 @@
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/vmalloc.h>
#include <linux/mlx4/cmd.h>
#include "mlx4.h"
#include "icm.h"
/*
* Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
*/
struct mlx4_mpt_entry {
__be32 flags;
__be32 qpn;
__be32 key;
__be32 pd_flags;
__be64 start;
__be64 length;
__be32 lkey;
__be32 win_cnt;
u8 reserved1;
u8 flags2;
u8 reserved2;
u8 mtt_rep;
__be64 mtt_seg;
__be32 mtt_sz;
__be32 entity_size;
__be32 first_byte_offset;
} __attribute__((packed));
#define MLX4_MPT_FLAG_SW_OWNS (0xfUL << 28)
#define MLX4_MPT_FLAG_FREE (0x3UL << 28)
#define MLX4_MPT_FLAG_MIO (1 << 17)
@ -73,8 +54,6 @@ struct mlx4_mpt_entry {
#define MLX4_MPT_PD_FLAG_RAE (1 << 28)
#define MLX4_MPT_PD_FLAG_EN_INV (3 << 24)
#define MLX4_MPT_FLAG2_FBO_EN (1 << 7)
#define MLX4_MPT_STATUS_SW 0xF0
#define MLX4_MPT_STATUS_HW 0x00
@ -141,19 +120,19 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order)
buddy->max_order = max_order;
spin_lock_init(&buddy->lock);
buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *),
buddy->bits = kcalloc(buddy->max_order + 1, sizeof (long *),
GFP_KERNEL);
buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *),
buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free,
GFP_KERNEL);
if (!buddy->bits || !buddy->num_free)
goto err_out;
for (i = 0; i <= buddy->max_order; ++i) {
s = BITS_TO_LONGS(1 << (buddy->max_order - i));
buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL);
if (!buddy->bits[i])
goto err_out_free;
bitmap_zero(buddy->bits[i], 1 << (buddy->max_order - i));
buddy->bits[i] = kcalloc(s, sizeof (long), GFP_KERNEL | __GFP_NOWARN);
if (!buddy->bits[i]) {
goto err_out_free;
}
}
set_bit(0, buddy->bits[buddy->max_order]);
@ -163,7 +142,8 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order)
err_out_free:
for (i = 0; i <= buddy->max_order; ++i)
kfree(buddy->bits[i]);
if ( buddy->bits[i] )
kfree(buddy->bits[i]);
err_out:
kfree(buddy->bits);
@ -177,28 +157,54 @@ static void mlx4_buddy_cleanup(struct mlx4_buddy *buddy)
int i;
for (i = 0; i <= buddy->max_order; ++i)
kfree(buddy->bits[i]);
kfree(buddy->bits[i]);
kfree(buddy->bits);
kfree(buddy->num_free);
}
static u32 mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
{
struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
u32 seg;
int seg_order;
u32 offset;
seg = mlx4_buddy_alloc(&mr_table->mtt_buddy, order);
seg_order = max_t(int, order - log_mtts_per_seg, 0);
seg = mlx4_buddy_alloc(&mr_table->mtt_buddy, seg_order);
if (seg == -1)
return -1;
if (mlx4_table_get_range(dev, &mr_table->mtt_table, seg,
seg + (1 << order) - 1)) {
mlx4_buddy_free(&mr_table->mtt_buddy, seg, order);
offset = seg * (1 << log_mtts_per_seg);
if (mlx4_table_get_range(dev, &mr_table->mtt_table, offset,
offset + (1 << order) - 1)) {
mlx4_buddy_free(&mr_table->mtt_buddy, seg, seg_order);
return -1;
}
return seg;
return offset;
}
static u32 mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
{
u64 in_param = 0;
u64 out_param;
int err;
if (mlx4_is_mfunc(dev)) {
set_param_l(&in_param, order);
err = mlx4_cmd_imm(dev, in_param, &out_param, RES_MTT,
RES_OP_RESERVE_AND_MAP,
MLX4_CMD_ALLOC_RES,
MLX4_CMD_TIME_CLASS_A,
MLX4_CMD_WRAPPED);
if (err)
return -1;
return get_param_l(&out_param);
}
return __mlx4_alloc_mtt_range(dev, order);
}
int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
@ -213,33 +219,66 @@ int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
} else
mtt->page_shift = page_shift;
for (mtt->order = 0, i = dev->caps.mtts_per_seg; i < npages; i <<= 1)
for (mtt->order = 0, i = 1; i < npages; i <<= 1)
++mtt->order;
mtt->first_seg = mlx4_alloc_mtt_range(dev, mtt->order);
if (mtt->first_seg == -1)
mtt->offset = mlx4_alloc_mtt_range(dev, mtt->order);
if (mtt->offset == -1) {
mlx4_err(dev, "Failed to allocate mtts for %d pages(order %d)\n",
npages, mtt->order);
return -ENOMEM;
}
return 0;
}
EXPORT_SYMBOL_GPL(mlx4_mtt_init);
void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order)
{
u32 first_seg;
int seg_order;
struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
seg_order = max_t(int, order - log_mtts_per_seg, 0);
first_seg = offset / (1 << log_mtts_per_seg);
mlx4_buddy_free(&mr_table->mtt_buddy, first_seg, seg_order);
mlx4_table_put_range(dev, &mr_table->mtt_table, offset,
offset + (1 << order) - 1);
}
static void mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order)
{
u64 in_param = 0;
int err;
if (mlx4_is_mfunc(dev)) {
set_param_l(&in_param, offset);
set_param_h(&in_param, order);
err = mlx4_cmd(dev, in_param, RES_MTT, RES_OP_RESERVE_AND_MAP,
MLX4_CMD_FREE_RES,
MLX4_CMD_TIME_CLASS_A,
MLX4_CMD_WRAPPED);
if (err)
mlx4_warn(dev, "Failed to free mtt range at:"
"%d order:%d\n", offset, order);
return;
}
__mlx4_free_mtt_range(dev, offset, order);
}
void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
{
if (mtt->order < 0)
return;
mlx4_buddy_free(&mr_table->mtt_buddy, mtt->first_seg, mtt->order);
mlx4_table_put_range(dev, &mr_table->mtt_table, mtt->first_seg,
mtt->first_seg + (1 << mtt->order) - 1);
mlx4_free_mtt_range(dev, mtt->offset, mtt->order);
}
EXPORT_SYMBOL_GPL(mlx4_mtt_cleanup);
u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
{
return (u64) mtt->first_seg * dev->caps.mtt_entry_sz;
return (u64) mtt->offset * dev->caps.mtt_entry_sz;
}
EXPORT_SYMBOL_GPL(mlx4_mtt_addr);
@ -256,40 +295,20 @@ static u32 key_to_hw_index(u32 key)
static int mlx4_SW2HW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int mpt_index)
{
return mlx4_cmd(dev, mailbox->dma, mpt_index, 0, MLX4_CMD_SW2HW_MPT,
MLX4_CMD_TIME_CLASS_B);
return mlx4_cmd(dev, mailbox->dma, mpt_index,
0, MLX4_CMD_SW2HW_MPT, MLX4_CMD_TIME_CLASS_B,
MLX4_CMD_WRAPPED);
}
static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int mpt_index)
{
return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index,
!mailbox, MLX4_CMD_HW2SW_MPT, MLX4_CMD_TIME_CLASS_B);
!mailbox, MLX4_CMD_HW2SW_MPT,
MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
}
int mlx4_mr_reserve_range(struct mlx4_dev *dev, int cnt, int align, u32 *base_mridx)
{
struct mlx4_priv *priv = mlx4_priv(dev);
u32 mridx;
mridx = mlx4_bitmap_alloc_range(&priv->mr_table.mpt_bitmap, cnt, align);
if (mridx == -1)
return -ENOMEM;
*base_mridx = mridx;
return 0;
}
EXPORT_SYMBOL_GPL(mlx4_mr_reserve_range);
void mlx4_mr_release_range(struct mlx4_dev *dev, u32 base_mridx, int cnt)
{
struct mlx4_priv *priv = mlx4_priv(dev);
mlx4_bitmap_free_range(&priv->mr_table.mpt_bitmap, base_mridx, cnt);
}
EXPORT_SYMBOL_GPL(mlx4_mr_release_range);
int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
static int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
u64 iova, u64 size, u32 access, int npages,
int page_shift, struct mlx4_mr *mr)
{
@ -297,65 +316,159 @@ int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
mr->size = size;
mr->pd = pd;
mr->access = access;
mr->enabled = 0;
mr->enabled = MLX4_MR_DISABLED;
mr->key = hw_index_to_key(mridx);
return mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
}
EXPORT_SYMBOL_GPL(mlx4_mr_alloc_reserved);
static int mlx4_WRITE_MTT(struct mlx4_dev *dev,
struct mlx4_cmd_mailbox *mailbox,
int num_entries)
{
return mlx4_cmd(dev, mailbox->dma, num_entries, 0, MLX4_CMD_WRITE_MTT,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
}
int __mlx4_mr_reserve(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
return mlx4_bitmap_alloc(&priv->mr_table.mpt_bitmap);
}
static int mlx4_mr_reserve(struct mlx4_dev *dev)
{
u64 out_param;
if (mlx4_is_mfunc(dev)) {
if (mlx4_cmd_imm(dev, 0, &out_param, RES_MPT, RES_OP_RESERVE,
MLX4_CMD_ALLOC_RES,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED))
return -1;
return get_param_l(&out_param);
}
return __mlx4_mr_reserve(dev);
}
void __mlx4_mr_release(struct mlx4_dev *dev, u32 index)
{
struct mlx4_priv *priv = mlx4_priv(dev);
mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index);
}
static void mlx4_mr_release(struct mlx4_dev *dev, u32 index)
{
u64 in_param = 0;
if (mlx4_is_mfunc(dev)) {
set_param_l(&in_param, index);
if (mlx4_cmd(dev, in_param, RES_MPT, RES_OP_RESERVE,
MLX4_CMD_FREE_RES,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED))
mlx4_warn(dev, "Failed to release mr index:%d\n",
index);
return;
}
__mlx4_mr_release(dev, index);
}
int __mlx4_mr_alloc_icm(struct mlx4_dev *dev, u32 index)
{
struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
return mlx4_table_get(dev, &mr_table->dmpt_table, index);
}
static int mlx4_mr_alloc_icm(struct mlx4_dev *dev, u32 index)
{
u64 param = 0;
if (mlx4_is_mfunc(dev)) {
set_param_l(&param, index);
return mlx4_cmd_imm(dev, param, &param, RES_MPT, RES_OP_MAP_ICM,
MLX4_CMD_ALLOC_RES,
MLX4_CMD_TIME_CLASS_A,
MLX4_CMD_WRAPPED);
}
return __mlx4_mr_alloc_icm(dev, index);
}
void __mlx4_mr_free_icm(struct mlx4_dev *dev, u32 index)
{
struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
mlx4_table_put(dev, &mr_table->dmpt_table, index);
}
static void mlx4_mr_free_icm(struct mlx4_dev *dev, u32 index)
{
u64 in_param = 0;
if (mlx4_is_mfunc(dev)) {
set_param_l(&in_param, index);
if (mlx4_cmd(dev, in_param, RES_MPT, RES_OP_MAP_ICM,
MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
MLX4_CMD_WRAPPED))
mlx4_warn(dev, "Failed to free icm of mr index:%d\n",
index);
return;
}
return __mlx4_mr_free_icm(dev, index);
}
int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
int npages, int page_shift, struct mlx4_mr *mr)
{
struct mlx4_priv *priv = mlx4_priv(dev);
u32 index;
int err;
index = mlx4_bitmap_alloc(&priv->mr_table.mpt_bitmap);
index = mlx4_mr_reserve(dev);
if (index == -1)
return -ENOMEM;
err = mlx4_mr_alloc_reserved(dev, index, pd, iova, size,
access, npages, page_shift, mr);
if (err)
mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index);
mlx4_mr_release(dev, index);
return err;
}
EXPORT_SYMBOL_GPL(mlx4_mr_alloc);
void mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr)
static void mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr)
{
int err;
if (mr->enabled) {
if (mr->enabled == MLX4_MR_EN_HW) {
err = mlx4_HW2SW_MPT(dev, NULL,
key_to_hw_index(mr->key) &
(dev->caps.num_mpts - 1));
if (err)
mlx4_warn(dev, "HW2SW_MPT failed (%d)\n", err);
}
mlx4_warn(dev, "xxx HW2SW_MPT failed (%d)\n", err);
mr->enabled = MLX4_MR_EN_SW;
}
mlx4_mtt_cleanup(dev, &mr->mtt);
}
EXPORT_SYMBOL_GPL(mlx4_mr_free_reserved);
void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr)
{
struct mlx4_priv *priv = mlx4_priv(dev);
mlx4_mr_free_reserved(dev, mr);
mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, key_to_hw_index(mr->key));
if (mr->enabled)
mlx4_mr_free_icm(dev, key_to_hw_index(mr->key));
mlx4_mr_release(dev, key_to_hw_index(mr->key));
}
EXPORT_SYMBOL_GPL(mlx4_mr_free);
int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
{
struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_mpt_entry *mpt_entry;
int err;
err = mlx4_table_get(dev, &mr_table->dmpt_table, key_to_hw_index(mr->key));
err = mlx4_mr_alloc_icm(dev, key_to_hw_index(mr->key));
if (err)
return err;
@ -380,9 +493,10 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
if (mr->mtt.order < 0) {
mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL);
mpt_entry->mtt_seg = 0;
mpt_entry->mtt_addr = 0;
} else {
mpt_entry->mtt_seg = cpu_to_be64(mlx4_mtt_addr(dev, &mr->mtt));
mpt_entry->mtt_addr = cpu_to_be64(mlx4_mtt_addr(dev,
&mr->mtt));
}
if (mr->mtt.order >= 0 && mr->mtt.page_shift == 0) {
@ -390,8 +504,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_FREE);
mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG |
MLX4_MPT_PD_FLAG_RAE);
mpt_entry->mtt_sz = cpu_to_be32((1 << mr->mtt.order) *
dev->caps.mtts_per_seg);
mpt_entry->mtt_sz = cpu_to_be32(1 << mr->mtt.order);
} else {
mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS);
}
@ -402,8 +515,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
mlx4_warn(dev, "SW2HW_MPT failed (%d)\n", err);
goto err_cmd;
}
mr->enabled = 1;
mr->enabled = MLX4_MR_EN_HW;
mlx4_free_cmd_mailbox(dev, mailbox);
@ -413,7 +525,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
mlx4_free_cmd_mailbox(dev, mailbox);
err_table:
mlx4_table_put(dev, &mr_table->dmpt_table, key_to_hw_index(mr->key));
mlx4_mr_free_icm(dev, key_to_hw_index(mr->key));
return err;
}
EXPORT_SYMBOL_GPL(mlx4_mr_enable);
@ -425,50 +537,94 @@ static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
__be64 *mtts;
dma_addr_t dma_handle;
int i;
int s = start_index * sizeof (u64);
/* All MTTs must fit in the same page */
if (start_index / (PAGE_SIZE / sizeof (u64)) !=
(start_index + npages - 1) / (PAGE_SIZE / sizeof (u64)))
return -EINVAL;
mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->offset +
start_index, &dma_handle);
if (start_index & (dev->caps.mtts_per_seg - 1))
return -EINVAL;
mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->first_seg +
s / dev->caps.mtt_entry_sz, &dma_handle);
if (!mtts)
return -ENOMEM;
dma_sync_single_for_cpu(&dev->pdev->dev, dma_handle,
npages * sizeof (u64), DMA_TO_DEVICE);
for (i = 0; i < npages; ++i)
mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
dma_sync_single(&dev->pdev->dev, dma_handle, npages * sizeof (u64), DMA_TO_DEVICE);
dma_sync_single_for_device(&dev->pdev->dev, dma_handle,
npages * sizeof (u64), DMA_TO_DEVICE);
return 0;
}
int __mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
int start_index, int npages, u64 *page_list)
{
int err = 0;
int chunk;
int mtts_per_page;
int max_mtts_first_page;
/* compute how may mtts fit in the first page */
mtts_per_page = PAGE_SIZE / sizeof(u64);
max_mtts_first_page = mtts_per_page - (mtt->offset + start_index)
% mtts_per_page;
chunk = min_t(int, max_mtts_first_page, npages);
while (npages > 0) {
err = mlx4_write_mtt_chunk(dev, mtt, start_index, chunk, page_list);
if (err)
return err;
npages -= chunk;
start_index += chunk;
page_list += chunk;
chunk = min_t(int, mtts_per_page, npages);
}
return err;
}
int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
int start_index, int npages, u64 *page_list)
{
struct mlx4_cmd_mailbox *mailbox = NULL;
__be64 *inbox = NULL;
int chunk;
int err;
int err = 0;
int i;
if (mtt->order < 0)
return -EINVAL;
while (npages > 0) {
chunk = min_t(int, PAGE_SIZE / sizeof(u64), npages);
err = mlx4_write_mtt_chunk(dev, mtt, start_index, chunk, page_list);
if (err)
return err;
if (mlx4_is_mfunc(dev)) {
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
inbox = mailbox->buf;
npages -= chunk;
start_index += chunk;
page_list += chunk;
while (npages > 0) {
chunk = min_t(int, MLX4_MAILBOX_SIZE / sizeof(u64) - 2,
npages);
inbox[0] = cpu_to_be64(mtt->offset + start_index);
inbox[1] = 0;
for (i = 0; i < chunk; ++i)
inbox[i + 2] = cpu_to_be64(page_list[i] |
MLX4_MTT_FLAG_PRESENT);
err = mlx4_WRITE_MTT(dev, mailbox, chunk);
if (err) {
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
npages -= chunk;
start_index += chunk;
page_list += chunk;
}
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
return 0;
return __mlx4_write_mtt(dev, mtt, start_index, npages, page_list);
}
EXPORT_SYMBOL_GPL(mlx4_write_mtt);
@ -484,7 +640,7 @@ int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
return -ENOMEM;
for (i = 0; i < buf->npages; ++i)
if (buf->direct.map)
if (buf->nbufs == 1)
page_list[i] = buf->direct.map + (i << buf->page_shift);
else
page_list[i] = buf->page_list[i].map;
@ -498,9 +654,15 @@ EXPORT_SYMBOL_GPL(mlx4_buf_write_mtt);
int mlx4_init_mr_table(struct mlx4_dev *dev)
{
struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_mr_table *mr_table = &priv->mr_table;
int err;
/* Nothing to do for slaves - all MR handling is forwarded
* to the master */
if (mlx4_is_slave(dev))
return 0;
if (!is_power_of_2(dev->caps.num_mpts))
return -EINVAL;
@ -510,13 +672,17 @@ int mlx4_init_mr_table(struct mlx4_dev *dev)
return err;
err = mlx4_buddy_init(&mr_table->mtt_buddy,
ilog2(dev->caps.num_mtt_segs));
ilog2((u32)dev->caps.num_mtts /
(1 << log_mtts_per_seg)));
if (err)
goto err_buddy;
if (dev->caps.reserved_mtts) {
if (mlx4_alloc_mtt_range(dev, fls(dev->caps.reserved_mtts - 1)) == -1) {
mlx4_warn(dev, "MTT table of order %d is too small.\n",
priv->reserved_mtts =
mlx4_alloc_mtt_range(dev,
fls(dev->caps.reserved_mtts - 1));
if (priv->reserved_mtts < 0) {
mlx4_warn(dev, "MTT table of order %u is too small.\n",
mr_table->mtt_buddy.max_order);
err = -ENOMEM;
goto err_reserve_mtts;
@ -536,8 +702,14 @@ int mlx4_init_mr_table(struct mlx4_dev *dev)
void mlx4_cleanup_mr_table(struct mlx4_dev *dev)
{
struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_mr_table *mr_table = &priv->mr_table;
if (mlx4_is_slave(dev))
return;
if (priv->reserved_mtts >= 0)
mlx4_free_mtt_range(dev, priv->reserved_mtts,
fls(dev->caps.reserved_mtts - 1));
mlx4_buddy_cleanup(&mr_table->mtt_buddy);
mlx4_bitmap_cleanup(&mr_table->mpt_bitmap);
}
@ -569,9 +741,8 @@ static inline int mlx4_check_fmr(struct mlx4_fmr *fmr, u64 *page_list,
return 0;
}
int mlx4_map_phys_fmr_fbo(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
u64 *page_list, int npages, u64 iova, u32 fbo,
u32 len, u32 *lkey, u32 *rkey, int same_key)
int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
int npages, u64 iova, u32 *lkey, u32 *rkey)
{
u32 key;
int i, err;
@ -583,8 +754,7 @@ int mlx4_map_phys_fmr_fbo(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
++fmr->maps;
key = key_to_hw_index(fmr->mr.key);
if (!same_key)
key += dev->caps.num_mpts;
key += dev->caps.num_mpts;
*lkey = *rkey = fmr->mr.key = hw_index_to_key(key);
*(u8 *) fmr->mpt = MLX4_MPT_STATUS_SW;
@ -592,18 +762,19 @@ int mlx4_map_phys_fmr_fbo(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
/* Make sure MPT status is visible before writing MTT entries */
wmb();
dma_sync_single_for_cpu(&dev->pdev->dev, fmr->dma_handle,
npages * sizeof(u64), DMA_TO_DEVICE);
for (i = 0; i < npages; ++i)
fmr->mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
dma_sync_single(&dev->pdev->dev, fmr->dma_handle,
npages * sizeof(u64), DMA_TO_DEVICE);
dma_sync_single_for_device(&dev->pdev->dev, fmr->dma_handle,
npages * sizeof(u64), DMA_TO_DEVICE);
fmr->mpt->key = cpu_to_be32(key);
fmr->mpt->lkey = cpu_to_be32(key);
fmr->mpt->length = cpu_to_be64(len);
fmr->mpt->length = cpu_to_be64(npages * (1ull << fmr->page_shift));
fmr->mpt->start = cpu_to_be64(iova);
fmr->mpt->first_byte_offset = cpu_to_be32(fbo & 0x001fffff);
fmr->mpt->flags2 = (fbo ? MLX4_MPT_FLAG2_FBO_EN : 0);
/* Make MTT entries are visible before setting MPT status */
wmb();
@ -615,25 +786,17 @@ int mlx4_map_phys_fmr_fbo(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
return 0;
}
EXPORT_SYMBOL_GPL(mlx4_map_phys_fmr_fbo);
int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
int npages, u64 iova, u32 *lkey, u32 *rkey)
{
u32 len = npages * (1ull << fmr->page_shift);
return mlx4_map_phys_fmr_fbo(dev, fmr, page_list, npages, iova, 0,
len, lkey, rkey, 0);
}
EXPORT_SYMBOL_GPL(mlx4_map_phys_fmr);
int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
int max_maps, u8 page_shift, struct mlx4_fmr *fmr)
{
struct mlx4_priv *priv = mlx4_priv(dev);
u64 mtt_seg;
int err = -ENOMEM;
if (max_maps > dev->caps.max_fmr_maps)
return -EINVAL;
if (page_shift < (ffs(dev->caps.page_size_cap) - 1) || page_shift >= 32)
return -EINVAL;
@ -651,11 +814,10 @@ int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
if (err)
return err;
mtt_seg = fmr->mr.mtt.first_seg * dev->caps.mtt_entry_sz;
fmr->mtts = mlx4_table_find(&priv->mr_table.mtt_table,
fmr->mr.mtt.first_seg,
fmr->mr.mtt.offset,
&fmr->dma_handle);
if (!fmr->mtts) {
err = -ENOMEM;
goto err_free;
@ -669,49 +831,6 @@ int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
}
EXPORT_SYMBOL_GPL(mlx4_fmr_alloc);
int mlx4_fmr_alloc_reserved(struct mlx4_dev *dev, u32 mridx,
u32 pd, u32 access, int max_pages,
int max_maps, u8 page_shift, struct mlx4_fmr *fmr)
{
struct mlx4_priv *priv = mlx4_priv(dev);
u64 mtt_seg;
int err = -ENOMEM;
if (page_shift < (ffs(dev->caps.page_size_cap) - 1) || page_shift >= 32)
return -EINVAL;
/* All MTTs must fit in the same page */
if (max_pages * sizeof *fmr->mtts > PAGE_SIZE)
return -EINVAL;
fmr->page_shift = page_shift;
fmr->max_pages = max_pages;
fmr->max_maps = max_maps;
fmr->maps = 0;
err = mlx4_mr_alloc_reserved(dev, mridx, pd, 0, 0, access, max_pages,
page_shift, &fmr->mr);
if (err)
return err;
mtt_seg = fmr->mr.mtt.first_seg * dev->caps.mtt_entry_sz;
fmr->mtts = mlx4_table_find(&priv->mr_table.mtt_table,
fmr->mr.mtt.first_seg,
&fmr->dma_handle);
if (!fmr->mtts) {
err = -ENOMEM;
goto err_free;
}
return 0;
err_free:
mlx4_mr_free_reserved(dev, &fmr->mr);
return err;
}
EXPORT_SYMBOL_GPL(mlx4_fmr_alloc_reserved);
int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
{
struct mlx4_priv *priv = mlx4_priv(dev);
@ -733,12 +852,30 @@ EXPORT_SYMBOL_GPL(mlx4_fmr_enable);
void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
u32 *lkey, u32 *rkey)
{
struct mlx4_cmd_mailbox *mailbox;
int err;
if (!fmr->maps)
return;
fmr->maps = 0;
*(u8 *) fmr->mpt = MLX4_MPT_STATUS_SW;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox)) {
err = PTR_ERR(mailbox);
mlx4_warn(dev, "mlx4_alloc_cmd_mailbox failed (%d)\n", err);
return;
}
err = mlx4_HW2SW_MPT(dev, NULL,
key_to_hw_index(fmr->mr.key) &
(dev->caps.num_mpts - 1));
mlx4_free_cmd_mailbox(dev, mailbox);
if (err) {
mlx4_warn(dev, "mlx4_HW2SW_MPT failed (%d)\n", err);
return;
}
fmr->mr.enabled = MLX4_MR_EN_SW;
}
EXPORT_SYMBOL_GPL(mlx4_fmr_unmap);
@ -747,27 +884,16 @@ int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
if (fmr->maps)
return -EBUSY;
fmr->mr.enabled = 0;
mlx4_mr_free(dev, &fmr->mr);
fmr->mr.enabled = MLX4_MR_DISABLED;
return 0;
}
EXPORT_SYMBOL_GPL(mlx4_fmr_free);
int mlx4_fmr_free_reserved(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
{
if (fmr->maps)
return -EBUSY;
fmr->mr.enabled = 0;
mlx4_mr_free_reserved(dev, &fmr->mr);
return 0;
}
EXPORT_SYMBOL_GPL(mlx4_fmr_free_reserved);
int mlx4_SYNC_TPT(struct mlx4_dev *dev)
{
return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT, 1000);
return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT, 1000,
MLX4_CMD_NATIVE);
}
EXPORT_SYMBOL_GPL(mlx4_SYNC_TPT);

View File

@ -62,12 +62,66 @@ void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn)
}
EXPORT_SYMBOL_GPL(mlx4_pd_free);
int __mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn)
{
struct mlx4_priv *priv = mlx4_priv(dev);
*xrcdn = mlx4_bitmap_alloc(&priv->xrcd_bitmap);
if (*xrcdn == -1)
return -ENOMEM;
return 0;
}
int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn)
{
u64 out_param;
int err;
if (mlx4_is_mfunc(dev)) {
err = mlx4_cmd_imm(dev, 0, &out_param,
RES_XRCD, RES_OP_RESERVE,
MLX4_CMD_ALLOC_RES,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
if (err)
return err;
*xrcdn = get_param_l(&out_param);
return 0;
}
return __mlx4_xrcd_alloc(dev, xrcdn);
}
EXPORT_SYMBOL_GPL(mlx4_xrcd_alloc);
void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn)
{
mlx4_bitmap_free(&mlx4_priv(dev)->xrcd_bitmap, xrcdn);
}
void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn)
{
u64 in_param = 0;
int err;
if (mlx4_is_mfunc(dev)) {
set_param_l(&in_param, xrcdn);
err = mlx4_cmd(dev, in_param, RES_XRCD,
RES_OP_RESERVE, MLX4_CMD_FREE_RES,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
if (err)
mlx4_warn(dev, "Failed to release xrcdn %d\n", xrcdn);
} else
__mlx4_xrcd_free(dev, xrcdn);
}
EXPORT_SYMBOL_GPL(mlx4_xrcd_free);
int mlx4_init_pd_table(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
return mlx4_bitmap_init(&priv->pd_bitmap, dev->caps.num_pds,
(1 << 24) - 1, dev->caps.reserved_pds, 0);
(1 << NOT_MASKED_PD_BITS) - 1,
dev->caps.reserved_pds, 0);
}
void mlx4_cleanup_pd_table(struct mlx4_dev *dev)
@ -75,16 +129,34 @@ void mlx4_cleanup_pd_table(struct mlx4_dev *dev)
mlx4_bitmap_cleanup(&mlx4_priv(dev)->pd_bitmap);
}
int mlx4_init_xrcd_table(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
return mlx4_bitmap_init(&priv->xrcd_bitmap, (1 << 16),
(1 << 16) - 1, dev->caps.reserved_xrcds + 1, 0);
}
void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev)
{
mlx4_bitmap_cleanup(&mlx4_priv(dev)->xrcd_bitmap);
}
int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar)
{
int offset;
uar->index = mlx4_bitmap_alloc(&mlx4_priv(dev)->uar_table.bitmap);
if (uar->index == -1)
return -ENOMEM;
uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + uar->index;
if (mlx4_is_slave(dev))
offset = uar->index % ((int) pci_resource_len(dev->pdev, 2) /
dev->caps.uar_page_size);
else
offset = uar->index;
uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + offset;
uar->map = NULL;
return 0;
}
EXPORT_SYMBOL_GPL(mlx4_uar_alloc);
@ -95,7 +167,8 @@ void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar)
}
EXPORT_SYMBOL_GPL(mlx4_uar_free);
int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf)
#ifndef CONFIG_PPC
int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_uar *uar;
@ -113,10 +186,13 @@ int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf)
err = -ENOMEM;
goto out;
}
uar = kmalloc(sizeof *uar, GFP_KERNEL);
uar = kmalloc_node(sizeof *uar, GFP_KERNEL, node);
if (!uar) {
err = -ENOMEM;
goto out;
uar = kmalloc(sizeof *uar, GFP_KERNEL);
if (!uar) {
err = -ENOMEM;
goto out;
}
}
err = mlx4_uar_alloc(dev, uar);
if (err)
@ -191,6 +267,21 @@ void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf)
}
EXPORT_SYMBOL_GPL(mlx4_bf_free);
#else
int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node)
{
memset(bf, 0, sizeof *bf);
return -ENOSYS;
}
EXPORT_SYMBOL_GPL(mlx4_bf_alloc);
void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf)
{
return;
}
EXPORT_SYMBOL_GPL(mlx4_bf_free);
#endif
int mlx4_init_uar_table(struct mlx4_dev *dev)
{
if (dev->caps.num_uars <= 128) {
@ -202,7 +293,7 @@ int mlx4_init_uar_table(struct mlx4_dev *dev)
return mlx4_bitmap_init(&mlx4_priv(dev)->uar_table.bitmap,
dev->caps.num_uars, dev->caps.num_uars - 1,
max(128, dev->caps.reserved_uars), 0);
dev->caps.reserved_uars, 0);
}
void mlx4_cleanup_uar_table(struct mlx4_dev *dev)

View File

@ -34,19 +34,26 @@
#include <linux/if_ether.h>
#include <linux/mlx4/cmd.h>
#include <linux/moduleparam.h>
#include "mlx4.h"
int mlx4_ib_set_4k_mtu = 0;
module_param_named(set_4k_mtu, mlx4_ib_set_4k_mtu, int, 0444);
MODULE_PARM_DESC(set_4k_mtu, "attempt to set 4K MTU to all ConnectX ports");
int mlx4_set_4k_mtu = -1;
module_param_named(set_4k_mtu, mlx4_set_4k_mtu, int, 0444);
MODULE_PARM_DESC(set_4k_mtu,
"(Obsolete) attempt to set 4K MTU to all ConnectX ports");
#define MLX4_MAC_VALID (1ull << 63)
#define MLX4_MAC_MASK 0xffffffffffffULL
#define MLX4_VLAN_VALID (1u << 31)
#define MLX4_VLAN_MASK 0xfff
#define MLX4_STATS_TRAFFIC_COUNTERS_MASK 0xfULL
#define MLX4_STATS_TRAFFIC_DROPS_MASK 0xc0ULL
#define MLX4_STATS_ERROR_COUNTERS_MASK 0x1ffc30ULL
#define MLX4_STATS_PORT_COUNTERS_MASK 0x1fe00000ULL
#define MLX4_STATS_IF_RX_ERRORS_COUNTERS_MASK 0x8010ULL
void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table)
{
int i;
@ -69,10 +76,36 @@ void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table)
table->entries[i] = 0;
table->refs[i] = 0;
}
table->max = 1 << dev->caps.log_num_vlans;
table->max = (1 << dev->caps.log_num_vlans) - MLX4_VLAN_REGULAR;
table->total = 0;
}
static int validate_index(struct mlx4_dev *dev,
struct mlx4_mac_table *table, int index)
{
int err = 0;
if (index < 0 || index >= table->max || !table->entries[index]) {
mlx4_warn(dev, "No valid Mac entry for the given index\n");
err = -EINVAL;
}
return err;
}
static int find_index(struct mlx4_dev *dev,
struct mlx4_mac_table *table, u64 mac)
{
int i;
for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
if ((mac & MLX4_MAC_MASK) ==
(MLX4_MAC_MASK & be64_to_cpu(table->entries[i])))
return i;
}
/* Mac not found */
return -EINVAL;
}
static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port,
__be64 *entries)
{
@ -87,40 +120,39 @@ static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port,
memcpy(mailbox->buf, entries, MLX4_MAC_TABLE_SIZE);
in_mod = MLX4_SET_PORT_MAC_TABLE << 8 | port;
err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
MLX4_CMD_TIME_CLASS_B);
MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index)
int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
{
struct mlx4_mac_table *table = &mlx4_priv(dev)->port[port].mac_table;
struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
struct mlx4_mac_table *table = &info->mac_table;
int i, err = 0;
int free = -1;
mlx4_dbg(dev, "Registering MAC: 0x%llx\n", (unsigned long long) mac);
mlx4_dbg(dev, "Registering MAC: 0x%llx for port %d\n",
(unsigned long long) mac, port);
mutex_lock(&table->mutex);
for (i = 0; i < MLX4_MAX_MAC_NUM - 1; i++) {
if (free < 0 && !table->refs[i]) {
for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
if (free < 0 && !table->entries[i]) {
free = i;
continue;
}
if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) {
/* MAC already registered, increase refernce count */
*index = i;
/* MAC already registered, Must not have duplicates */
err = i;
++table->refs[i];
goto out;
}
}
if (free < 0) {
err = -ENOMEM;
goto out;
}
mlx4_dbg(dev, "Free MAC index is %d\n", free);
if (table->total == table->max) {
@ -130,47 +162,128 @@ int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index)
}
/* Register new MAC */
table->refs[free] = 1;
table->entries[free] = cpu_to_be64(mac | MLX4_MAC_VALID);
err = mlx4_set_port_mac_table(dev, port, table->entries);
if (unlikely(err)) {
mlx4_err(dev, "Failed adding MAC: 0x%llx\n", (unsigned long long) mac);
table->refs[free] = 0;
mlx4_err(dev, "Failed adding MAC: 0x%llx\n",
(unsigned long long) mac);
table->entries[free] = 0;
goto out;
}
table->refs[free] = 1;
*index = free;
err = free;
++table->total;
out:
mutex_unlock(&table->mutex);
return err;
}
EXPORT_SYMBOL_GPL(__mlx4_register_mac);
int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
{
u64 out_param = 0;
int err;
if (mlx4_is_mfunc(dev)) {
err = mlx4_cmd_imm(dev, mac, &out_param,
((u32) port) << 8 | (u32) RES_MAC,
RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
if (err)
return err;
return get_param_l(&out_param);
}
return __mlx4_register_mac(dev, port, mac);
}
EXPORT_SYMBOL_GPL(mlx4_register_mac);
void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int index)
int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port)
{
struct mlx4_mac_table *table = &mlx4_priv(dev)->port[port].mac_table;
return dev->caps.reserved_qps_base[MLX4_QP_REGION_ETH_ADDR] +
(port - 1) * (1 << dev->caps.log_num_macs);
}
EXPORT_SYMBOL_GPL(mlx4_get_base_qpn);
void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac)
{
struct mlx4_port_info *info;
struct mlx4_mac_table *table;
int index;
if (port < 1 || port > dev->caps.num_ports) {
mlx4_warn(dev, "invalid port number (%d), aborting...\n", port);
return;
}
info = &mlx4_priv(dev)->port[port];
table = &info->mac_table;
mutex_lock(&table->mutex);
if (!table->refs[index]) {
mlx4_warn(dev, "No MAC entry for index %d\n", index);
index = find_index(dev, table, mac);
if (validate_index(dev, table, index))
goto out;
}
if (--table->refs[index]) {
mlx4_warn(dev, "Have more references for index %d,"
"no need to modify MAC table\n", index);
mlx4_dbg(dev, "Have more references for index %d,"
"no need to modify mac table\n", index);
goto out;
}
table->entries[index] = 0;
mlx4_set_port_mac_table(dev, port, table->entries);
--table->total;
out:
mutex_unlock(&table->mutex);
}
EXPORT_SYMBOL_GPL(__mlx4_unregister_mac);
void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac)
{
u64 out_param = 0;
if (mlx4_is_mfunc(dev)) {
(void) mlx4_cmd_imm(dev, mac, &out_param,
((u32) port) << 8 | (u32) RES_MAC,
RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
return;
}
__mlx4_unregister_mac(dev, port, mac);
return;
}
EXPORT_SYMBOL_GPL(mlx4_unregister_mac);
int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac)
{
struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
struct mlx4_mac_table *table = &info->mac_table;
int index = qpn - info->base_qpn;
int err = 0;
/* CX1 doesn't support multi-functions */
mutex_lock(&table->mutex);
err = validate_index(dev, table, index);
if (err)
goto out;
table->entries[index] = cpu_to_be64(new_mac | MLX4_MAC_VALID);
err = mlx4_set_port_mac_table(dev, port, table->entries);
if (unlikely(err)) {
mlx4_err(dev, "Failed adding MAC: 0x%llx\n",
(unsigned long long) new_mac);
table->entries[index] = 0;
}
out:
mutex_unlock(&table->mutex);
return err;
}
EXPORT_SYMBOL_GPL(__mlx4_replace_mac);
static int mlx4_set_port_vlan_table(struct mlx4_dev *dev, u8 port,
__be32 *entries)
{
@ -185,7 +298,7 @@ static int mlx4_set_port_vlan_table(struct mlx4_dev *dev, u8 port,
memcpy(mailbox->buf, entries, MLX4_VLAN_TABLE_SIZE);
in_mod = MLX4_SET_PORT_VLAN_TABLE << 8 | port;
err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
MLX4_CMD_TIME_CLASS_B);
MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(dev, mailbox);
@ -201,7 +314,7 @@ int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx)
if (table->refs[i] &&
(vid == (MLX4_VLAN_MASK &
be32_to_cpu(table->entries[i])))) {
/* Vlan already registered, increase refernce count */
/* VLAN already registered, increase reference count */
*idx = i;
return 0;
}
@ -211,13 +324,21 @@ int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx)
}
EXPORT_SYMBOL_GPL(mlx4_find_cached_vlan);
int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan,
int *index)
{
struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table;
int i, err = 0;
int free = -1;
mutex_lock(&table->mutex);
if (table->total == table->max) {
/* No free vlan entries */
err = -ENOSPC;
goto out;
}
for (i = MLX4_VLAN_REGULAR; i < MLX4_MAX_VLAN_NUM; i++) {
if (free < 0 && (table->refs[i] == 0)) {
free = i;
@ -227,7 +348,7 @@ int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
if (table->refs[i] &&
(vlan == (MLX4_VLAN_MASK &
be32_to_cpu(table->entries[i])))) {
/* Vlan already registered, increase refernce count */
/* Vlan already registered, increase references count */
*index = i;
++table->refs[i];
goto out;
@ -239,13 +360,7 @@ int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
goto out;
}
if (table->total == table->max) {
/* No free vlan entries */
err = -ENOSPC;
goto out;
}
/* Register new MAC */
/* Register new VLAN */
table->refs[free] = 1;
table->entries[free] = cpu_to_be32(vlan | MLX4_VLAN_VALID);
@ -263,25 +378,49 @@ int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
mutex_unlock(&table->mutex);
return err;
}
int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
{
u64 out_param = 0;
int err;
if (vlan > 4095)
return -EINVAL;
if (mlx4_is_mfunc(dev)) {
err = mlx4_cmd_imm(dev, vlan, &out_param,
((u32) port) << 8 | (u32) RES_VLAN,
RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
if (!err)
*index = get_param_l(&out_param);
return err;
}
return __mlx4_register_vlan(dev, port, vlan, index);
}
EXPORT_SYMBOL_GPL(mlx4_register_vlan);
void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index)
void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan)
{
struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table;
int index;
mutex_lock(&table->mutex);
if (mlx4_find_cached_vlan(dev, port, vlan, &index)) {
mlx4_warn(dev, "vlan 0x%x is not in the vlan table\n", vlan);
goto out;
}
if (index < MLX4_VLAN_REGULAR) {
mlx4_warn(dev, "Trying to free special vlan index %d\n", index);
return;
}
mutex_lock(&table->mutex);
if (!table->refs[index]) {
mlx4_warn(dev, "No vlan entry for index %d\n", index);
goto out;
}
if (--table->refs[index]) {
mlx4_dbg(dev, "Have more references for index %d,"
"no need to modify vlan table\n", index);
mlx4_dbg(dev, "Have %d more references for index %d, "
"no need to modify vlan table\n", table->refs[index],
index);
goto out;
}
table->entries[index] = 0;
@ -290,6 +429,21 @@ void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index)
out:
mutex_unlock(&table->mutex);
}
void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan)
{
u64 out_param = 0;
if (mlx4_is_mfunc(dev)) {
(void) mlx4_cmd_imm(dev, vlan, &out_param,
((u32) port) << 8 | (u32) RES_VLAN,
RES_OP_RESERVE_AND_MAP,
MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
MLX4_CMD_WRAPPED);
return;
}
__mlx4_unregister_vlan(dev, port, vlan);
}
EXPORT_SYMBOL_GPL(mlx4_unregister_vlan);
int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps)
@ -320,20 +474,275 @@ int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps)
*(__be32 *) (&inbuf[20]) = cpu_to_be32(port);
err = mlx4_cmd_box(dev, inmailbox->dma, outmailbox->dma, port, 3,
MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C);
MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
MLX4_CMD_NATIVE);
if (!err)
*caps = *(__be32 *) (outbuf + 84);
mlx4_free_cmd_mailbox(dev, inmailbox);
mlx4_free_cmd_mailbox(dev, outmailbox);
return err;
}
static struct mlx4_roce_gid_entry zgid_entry;
int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port)
int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave)
{
if (slave == 0)
return MLX4_ROCE_PF_GIDS;
if (slave <= ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) % dev->num_vfs))
return ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / dev->num_vfs) + 1;
return (MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / dev->num_vfs;
}
int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave)
{
int gids;
int vfs;
gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS;
vfs = dev->num_vfs;
if (slave == 0)
return 0;
if (slave <= gids % vfs)
return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave - 1);
return MLX4_ROCE_PF_GIDS + (gids % vfs) + ((gids / vfs) * (slave - 1));
}
static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod,
u8 op_mod, struct mlx4_cmd_mailbox *inbox)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_port_info *port_info;
struct mlx4_mfunc_master_ctx *master = &priv->mfunc.master;
struct mlx4_slave_state *slave_st = &master->slave_state[slave];
struct mlx4_set_port_rqp_calc_context *qpn_context;
struct mlx4_set_port_general_context *gen_context;
struct mlx4_roce_gid_entry *gid_entry_tbl, *gid_entry_mbox, *gid_entry_mb1;
int reset_qkey_viols;
int port;
int is_eth;
int num_gids;
int base;
u32 in_modifier;
u32 promisc;
u16 mtu, prev_mtu;
int err;
int i, j;
int offset;
__be32 agg_cap_mask;
__be32 slave_cap_mask;
__be32 new_cap_mask;
port = in_mod & 0xff;
in_modifier = in_mod >> 8;
is_eth = op_mod;
port_info = &priv->port[port];
/* Slaves cannot perform SET_PORT operations except changing MTU */
if (is_eth) {
if (slave != dev->caps.function &&
in_modifier != MLX4_SET_PORT_GENERAL &&
in_modifier != MLX4_SET_PORT_GID_TABLE) {
mlx4_warn(dev, "denying SET_PORT for slave:%d\n",
slave);
return -EINVAL;
}
switch (in_modifier) {
case MLX4_SET_PORT_RQP_CALC:
qpn_context = inbox->buf;
qpn_context->base_qpn =
cpu_to_be32(port_info->base_qpn);
qpn_context->n_mac = 0x7;
promisc = be32_to_cpu(qpn_context->promisc) >>
SET_PORT_PROMISC_SHIFT;
qpn_context->promisc = cpu_to_be32(
promisc << SET_PORT_PROMISC_SHIFT |
port_info->base_qpn);
promisc = be32_to_cpu(qpn_context->mcast) >>
SET_PORT_MC_PROMISC_SHIFT;
qpn_context->mcast = cpu_to_be32(
promisc << SET_PORT_MC_PROMISC_SHIFT |
port_info->base_qpn);
break;
case MLX4_SET_PORT_GENERAL:
gen_context = inbox->buf;
/* Mtu is configured as the max MTU among all the
* the functions on the port. */
mtu = be16_to_cpu(gen_context->mtu);
mtu = min_t(int, mtu, dev->caps.eth_mtu_cap[port]);
prev_mtu = slave_st->mtu[port];
slave_st->mtu[port] = mtu;
if (mtu > master->max_mtu[port])
master->max_mtu[port] = mtu;
if (mtu < prev_mtu && prev_mtu ==
master->max_mtu[port]) {
slave_st->mtu[port] = mtu;
master->max_mtu[port] = mtu;
for (i = 0; i < dev->num_slaves; i++) {
master->max_mtu[port] =
max(master->max_mtu[port],
master->slave_state[i].mtu[port]);
}
}
gen_context->mtu = cpu_to_be16(master->max_mtu[port]);
break;
case MLX4_SET_PORT_GID_TABLE:
/* change to MULTIPLE entries: number of guest's gids
* need a FOR-loop here over number of gids the guest has.
* 1. Check no duplicates in gids passed by slave
*/
num_gids = mlx4_get_slave_num_gids(dev, slave);
base = mlx4_get_base_gid_ix(dev, slave);
gid_entry_mbox = (struct mlx4_roce_gid_entry *) (inbox->buf);
for (i = 0; i < num_gids; gid_entry_mbox++, i++) {
if (!memcmp(gid_entry_mbox->raw, zgid_entry.raw,
sizeof(zgid_entry)))
continue;
gid_entry_mb1 = gid_entry_mbox + 1;
for (j = i + 1; j < num_gids; gid_entry_mb1++, j++) {
if (!memcmp(gid_entry_mb1->raw,
zgid_entry.raw, sizeof(zgid_entry)))
continue;
if (!memcmp(gid_entry_mb1->raw, gid_entry_mbox->raw,
sizeof(gid_entry_mbox->raw))) {
/* found duplicate */
return -EINVAL;
}
}
}
/* 2. Check that do not have duplicates in OTHER
* entries in the port GID table
*/
for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) {
if (i >= base && i < base + num_gids)
continue; /* don't compare to slave's current gids */
gid_entry_tbl = &priv->roce_gids[port - 1][i];
if (!memcmp(gid_entry_tbl->raw, zgid_entry.raw, sizeof(zgid_entry)))
continue;
gid_entry_mbox = (struct mlx4_roce_gid_entry *) (inbox->buf);
for (j = 0; j < num_gids; gid_entry_mbox++, j++) {
if (!memcmp(gid_entry_mbox->raw, zgid_entry.raw,
sizeof(zgid_entry)))
continue;
if (!memcmp(gid_entry_mbox->raw, gid_entry_tbl->raw,
sizeof(gid_entry_tbl->raw))) {
/* found duplicate */
mlx4_warn(dev, "requested gid entry for slave:%d "
"is a duplicate of gid at index %d\n",
slave, i);
return -EINVAL;
}
}
}
/* insert slave GIDs with memcpy, starting at slave's base index */
gid_entry_mbox = (struct mlx4_roce_gid_entry *) (inbox->buf);
for (i = 0, offset = base; i < num_gids; gid_entry_mbox++, offset++, i++)
memcpy(priv->roce_gids[port - 1][offset].raw, gid_entry_mbox->raw, 16);
/* Now, copy roce port gids table to current mailbox for passing to FW */
gid_entry_mbox = (struct mlx4_roce_gid_entry *) (inbox->buf);
for (i = 0; i < MLX4_ROCE_MAX_GIDS; gid_entry_mbox++, i++)
memcpy(gid_entry_mbox->raw, priv->roce_gids[port - 1][i].raw, 16);
break;
}
return mlx4_cmd(dev, inbox->dma, in_mod, op_mod,
MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
MLX4_CMD_NATIVE);
}
/* For IB, we only consider:
* - The capability mask, which is set to the aggregate of all
* slave function capabilities
* - The QKey violatin counter - reset according to each request.
*/
if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
reset_qkey_viols = (*(u8 *) inbox->buf) & 0x40;
new_cap_mask = ((__be32 *) inbox->buf)[2];
} else {
reset_qkey_viols = ((u8 *) inbox->buf)[3] & 0x1;
new_cap_mask = ((__be32 *) inbox->buf)[1];
}
/* slave may not set the IS_SM capability for the port */
if (slave != mlx4_master_func_num(dev) &&
(be32_to_cpu(new_cap_mask) & MLX4_PORT_CAP_IS_SM))
return -EINVAL;
/* No DEV_MGMT in multifunc mode */
if (mlx4_is_mfunc(dev) &&
(be32_to_cpu(new_cap_mask) & MLX4_PORT_CAP_DEV_MGMT_SUP))
return -EINVAL;
agg_cap_mask = 0;
slave_cap_mask =
priv->mfunc.master.slave_state[slave].ib_cap_mask[port];
priv->mfunc.master.slave_state[slave].ib_cap_mask[port] = new_cap_mask;
for (i = 0; i < dev->num_slaves; i++)
agg_cap_mask |=
priv->mfunc.master.slave_state[i].ib_cap_mask[port];
/* only clear mailbox for guests. Master may be setting
* MTU or PKEY table size
*/
if (slave != dev->caps.function)
memset(inbox->buf, 0, 256);
if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
*(u8 *) inbox->buf |= !!reset_qkey_viols << 6;
((__be32 *) inbox->buf)[2] = agg_cap_mask;
} else {
((u8 *) inbox->buf)[3] |= !!reset_qkey_viols;
((__be32 *) inbox->buf)[1] = agg_cap_mask;
}
err = mlx4_cmd(dev, inbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
if (err)
priv->mfunc.master.slave_state[slave].ib_cap_mask[port] =
slave_cap_mask;
return err;
}
int mlx4_SET_PORT_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd)
{
return mlx4_common_set_port(dev, slave, vhcr->in_modifier,
vhcr->op_modifier, inbox);
}
/* bit locations for set port command with zero op modifier */
enum {
MLX4_SET_PORT_VL_CAP = 4, /* bits 7:4 */
MLX4_SET_PORT_MTU_CAP = 12, /* bits 15:12 */
MLX4_CHANGE_PORT_PKEY_TBL_SZ = 20,
MLX4_CHANGE_PORT_VL_CAP = 21,
MLX4_CHANGE_PORT_MTU_CAP = 22,
};
#define CX3_PPF_DEV_ID 0x1003
static int vl_cap_start(struct mlx4_dev *dev)
{
/* for non CX3 devices, start with 4 VLs to avoid errors in syslog */
if (dev->pdev->device != CX3_PPF_DEV_ID)
return 4;
return 8;
}
int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz)
{
struct mlx4_cmd_mailbox *mailbox;
int err;
int err, vl_cap, pkey_tbl_flag = 0;
u32 in_mod;
if (dev->caps.port_type[port] != MLX4_PORT_TYPE_IB)
if (dev->caps.port_type[port] == MLX4_PORT_TYPE_NONE)
return 0;
mailbox = mlx4_alloc_cmd_mailbox(dev);
@ -342,13 +751,295 @@ int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port)
memset(mailbox->buf, 0, 256);
if (mlx4_ib_set_4k_mtu)
((__be32 *) mailbox->buf)[0] |= cpu_to_be32((1 << 22) | (1 << 21) | (5 << 12) | (2 << 4));
if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) {
in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
err = mlx4_cmd(dev, mailbox->dma, in_mod, 1,
MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
MLX4_CMD_WRAPPED);
} else {
((__be32 *) mailbox->buf)[1] = dev->caps.ib_port_def_cap[port];
((__be32 *) mailbox->buf)[1] = dev->caps.ib_port_def_cap[port];
err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
MLX4_CMD_TIME_CLASS_B);
if (pkey_tbl_sz >= 0 && mlx4_is_master(dev)) {
pkey_tbl_flag = 1;
((__be16 *) mailbox->buf)[20] = cpu_to_be16(pkey_tbl_sz);
}
/* IB VL CAP enum isn't used by the firmware, just numerical values */
for (vl_cap = vl_cap_start(dev); vl_cap >= 1; vl_cap >>= 1) {
((__be32 *) mailbox->buf)[0] = cpu_to_be32(
(1 << MLX4_CHANGE_PORT_MTU_CAP) |
(1 << MLX4_CHANGE_PORT_VL_CAP) |
(pkey_tbl_flag << MLX4_CHANGE_PORT_PKEY_TBL_SZ) |
(dev->caps.port_ib_mtu[port] << MLX4_SET_PORT_MTU_CAP) |
(vl_cap << MLX4_SET_PORT_VL_CAP));
err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
if (err != -ENOMEM)
break;
}
}
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx)
{
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_set_port_general_context *context;
int err;
u32 in_mod;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
context = mailbox->buf;
memset(context, 0, sizeof *context);
context->flags = SET_PORT_GEN_ALL_VALID;
context->mtu = cpu_to_be16(mtu);
context->pptx = (pptx * (!pfctx)) << 7;
context->pfctx = pfctx;
context->pprx = (pprx * (!pfcrx)) << 7;
context->pfcrx = pfcrx;
in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
EXPORT_SYMBOL(mlx4_SET_PORT_general);
int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
u8 promisc)
{
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_set_port_rqp_calc_context *context;
int err;
u32 in_mod;
u32 m_promisc = (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) ?
MCAST_DIRECT : MCAST_DEFAULT;
/*
if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0)
return 0;
*/
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
context = mailbox->buf;
memset(context, 0, sizeof *context);
context->base_qpn = cpu_to_be32(base_qpn);
/*
* This assignment breaks vlan support - I don't know why. Probablya an A0 issue - shahar Klein
* context->n_mac = dev->caps.log_num_macs;
*/
context->promisc = cpu_to_be32(promisc << SET_PORT_PROMISC_SHIFT |
base_qpn);
context->mcast = cpu_to_be32(m_promisc << SET_PORT_MC_PROMISC_SHIFT |
base_qpn);
context->intra_no_vlan = 0;
context->no_vlan = MLX4_NO_VLAN_IDX;
context->intra_vlan_miss = 0;
context->vlan_miss = MLX4_VLAN_MISS_IDX;
in_mod = MLX4_SET_PORT_RQP_CALC << 8 | port;
err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
EXPORT_SYMBOL(mlx4_SET_PORT_qpn_calc);
int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc)
{
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_set_port_prio2tc_context *context;
int err;
u32 in_mod;
int i;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
context = mailbox->buf;
memset(context, 0, sizeof *context);
for (i = 0; i < MLX4_NUM_UP; i += 2)
context->prio2tc[i >> 1] = prio2tc[i] << 4 | prio2tc[i + 1];
in_mod = MLX4_SET_PORT_PRIO2TC << 8 | port;
err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
EXPORT_SYMBOL(mlx4_SET_PORT_PRIO2TC);
int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
u8 *pg, u16 *ratelimit)
{
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_set_port_scheduler_context *context;
int err;
u32 in_mod;
int i;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
context = mailbox->buf;
memset(context, 0, sizeof *context);
for (i = 0; i < MLX4_NUM_TC; i++) {
struct mlx4_port_scheduler_tc_cfg_be *tc = &context->tc[i];
u16 r;
if (ratelimit && ratelimit[i]) {
if (ratelimit[i] <= MLX4_MAX_100M_UNITS_VAL) {
r = ratelimit[i];
tc->max_bw_units =
htons(MLX4_RATELIMIT_100M_UNITS);
} else {
r = ratelimit[i]/10;
tc->max_bw_units =
htons(MLX4_RATELIMIT_1G_UNITS);
}
tc->max_bw_value = htons(r);
} else {
tc->max_bw_value = htons(MLX4_RATELIMIT_DEFAULT);
tc->max_bw_units = htons(MLX4_RATELIMIT_1G_UNITS);
}
tc->pg = htons(pg[i]);
tc->bw_precentage = htons(tc_tx_bw[i]);
}
in_mod = MLX4_SET_PORT_SCHEDULER << 8 | port;
err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(dev, mailbox);
return err;
}
EXPORT_SYMBOL(mlx4_SET_PORT_SCHEDULER);
int mlx4_SET_MCAST_FLTR_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd)
{
int err = 0;
return err;
}
int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port,
u64 mac, u64 clear, u8 mode)
{
return mlx4_cmd(dev, (mac | (clear << 63)), port, mode,
MLX4_CMD_SET_MCAST_FLTR, MLX4_CMD_TIME_CLASS_B,
MLX4_CMD_WRAPPED);
}
EXPORT_SYMBOL(mlx4_SET_MCAST_FLTR);
int mlx4_SET_VLAN_FLTR_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd)
{
int err = 0;
return err;
}
int mlx4_common_dump_eth_stats(struct mlx4_dev *dev, int slave,
u32 in_mod, struct mlx4_cmd_mailbox *outbox)
{
return mlx4_cmd_box(dev, 0, outbox->dma, in_mod, 0,
MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B,
MLX4_CMD_NATIVE);
}
int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd)
{
if (slave != dev->caps.function)
return 0;
return mlx4_common_dump_eth_stats(dev, slave,
vhcr->in_modifier, outbox);
}
void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap)
{
if (!mlx4_is_mfunc(dev)) {
*stats_bitmap = 0;
return;
}
*stats_bitmap = (MLX4_STATS_TRAFFIC_COUNTERS_MASK |
MLX4_STATS_TRAFFIC_DROPS_MASK |
MLX4_STATS_PORT_COUNTERS_MASK |
MLX4_STATS_IF_RX_ERRORS_COUNTERS_MASK);
if (mlx4_is_master(dev))
*stats_bitmap |= MLX4_STATS_ERROR_COUNTERS_MASK;
}
EXPORT_SYMBOL(mlx4_set_stats_bitmap);
int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, int *slave_id)
{
struct mlx4_priv *priv = mlx4_priv(dev);
int i, found_ix = -1;
int vf_gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS;
if (!mlx4_is_mfunc(dev))
return -EINVAL;
for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) {
if (!memcmp(priv->roce_gids[port - 1][i].raw, gid, 16)) {
found_ix = i;
break;
}
}
if (found_ix >= 0) {
if (found_ix < MLX4_ROCE_PF_GIDS)
*slave_id = 0;
else if (found_ix < MLX4_ROCE_PF_GIDS + (vf_gids % dev->num_vfs) *
(vf_gids / dev->num_vfs + 1))
*slave_id = ((found_ix - MLX4_ROCE_PF_GIDS) /
(vf_gids / dev->num_vfs + 1)) + 1;
else
*slave_id =
((found_ix - MLX4_ROCE_PF_GIDS -
((vf_gids % dev->num_vfs) * ((vf_gids / dev->num_vfs + 1)))) /
(vf_gids / dev->num_vfs)) + vf_gids % dev->num_vfs + 1;
}
return (found_ix >= 0) ? 0 : -EINVAL;
}
EXPORT_SYMBOL(mlx4_get_slave_from_roce_gid);
int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id, u8 *gid)
{
struct mlx4_priv *priv = mlx4_priv(dev);
if (!mlx4_is_master(dev))
return -EINVAL;
memcpy(gid, priv->roce_gids[port - 1][slave_id].raw, 16);
return 0;
}
EXPORT_SYMBOL(mlx4_get_roce_gid_from_slave);

View File

@ -32,7 +32,7 @@
* SOFTWARE.
*/
#include <linux/init.h>
#include <linux/slab.h>
#include "mlx4.h"
#include "fw.h"
@ -76,7 +76,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
u64 size;
u64 start;
int type;
int num;
u32 num;
int log_num;
};
@ -85,7 +85,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
struct mlx4_resource tmp;
int i, j;
profile = kzalloc(MLX4_RES_NUM * sizeof *profile, GFP_KERNEL);
profile = kcalloc(MLX4_RES_NUM, sizeof(*profile), GFP_KERNEL);
if (!profile)
return -ENOMEM;
@ -98,8 +98,8 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
profile[MLX4_RES_EQ].size = dev_cap->eqc_entry_sz;
profile[MLX4_RES_DMPT].size = dev_cap->dmpt_entry_sz;
profile[MLX4_RES_CMPT].size = dev_cap->cmpt_entry_sz;
profile[MLX4_RES_MTT].size = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz;
profile[MLX4_RES_MCG].size = MLX4_MGM_ENTRY_SIZE;
profile[MLX4_RES_MTT].size = dev_cap->mtt_entry_sz;
profile[MLX4_RES_MCG].size = mlx4_get_mgm_entry_size(dev);
profile[MLX4_RES_QP].num = request->num_qp;
profile[MLX4_RES_RDMARC].num = request->num_qp * request->rdmarc_per_qp;
@ -107,12 +107,12 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
profile[MLX4_RES_AUXC].num = request->num_qp;
profile[MLX4_RES_SRQ].num = request->num_srq;
profile[MLX4_RES_CQ].num = request->num_cq;
profile[MLX4_RES_EQ].num = min_t(unsigned, dev_cap->max_eqs,
dev_cap->reserved_eqs +
num_possible_cpus() + 1);
profile[MLX4_RES_EQ].num = mlx4_is_mfunc(dev) ?
dev->phys_caps.num_phys_eqs :
min_t(unsigned, dev_cap->max_eqs, MAX_MSIX);
profile[MLX4_RES_DMPT].num = request->num_mpt;
profile[MLX4_RES_CMPT].num = MLX4_NUM_CMPTS;
profile[MLX4_RES_MTT].num = request->num_mtt;
profile[MLX4_RES_MTT].num = request->num_mtt * (1 << log_mtts_per_seg);
profile[MLX4_RES_MCG].num = request->num_mcg;
for (i = 0; i < MLX4_RES_NUM; ++i) {
@ -198,9 +198,10 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
init_hca->log_num_cqs = profile[i].log_num;
break;
case MLX4_RES_EQ:
dev->caps.num_eqs = profile[i].num;
dev->caps.num_eqs = roundup_pow_of_two(min_t(unsigned, dev_cap->max_eqs,
MAX_MSIX));
init_hca->eqc_base = profile[i].start;
init_hca->log_num_eqs = profile[i].log_num;
init_hca->log_num_eqs = ilog2(dev->caps.num_eqs);
break;
case MLX4_RES_DMPT:
dev->caps.num_mpts = profile[i].num;
@ -212,17 +213,24 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
init_hca->cmpt_base = profile[i].start;
break;
case MLX4_RES_MTT:
dev->caps.num_mtt_segs = profile[i].num;
dev->caps.num_mtts = profile[i].num;
priv->mr_table.mtt_base = profile[i].start;
init_hca->mtt_base = profile[i].start;
break;
case MLX4_RES_MCG:
dev->caps.num_mgms = profile[i].num >> 1;
dev->caps.num_amgms = profile[i].num >> 1;
init_hca->mc_base = profile[i].start;
init_hca->log_mc_entry_sz = ilog2(MLX4_MGM_ENTRY_SIZE);
init_hca->log_mc_entry_sz =
ilog2(mlx4_get_mgm_entry_size(dev));
init_hca->log_mc_table_sz = profile[i].log_num;
init_hca->log_mc_hash_sz = profile[i].log_num - 1;
if (dev->caps.steering_mode ==
MLX4_STEERING_MODE_DEVICE_MANAGED) {
dev->caps.num_mgms = profile[i].num;
} else {
init_hca->log_mc_hash_sz =
profile[i].log_num - 1;
dev->caps.num_mgms = profile[i].num >> 1;
dev->caps.num_amgms = profile[i].num >> 1;
}
break;
default:
break;

View File

@ -41,6 +41,12 @@
#include "mlx4.h"
#include "icm.h"
/*
* QP to support BF should have bits 6,7 cleared
*/
#define MLX4_BF_QP_SKIP_MASK 0xc0
#define MLX4_MAX_BF_QP_RANGE 0x40
void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type)
{
struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
@ -55,7 +61,7 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type)
spin_unlock(&qp_table->lock);
if (!qp) {
mlx4_warn(dev, "Async event for bogus QP %08x\n", qpn);
mlx4_dbg(dev, "Async event for none existent QP %08x\n", qpn);
return;
}
@ -65,10 +71,25 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type)
complete(&qp->free);
}
int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state,
struct mlx4_qp_context *context, enum mlx4_qp_optpar optpar,
int sqd_event, struct mlx4_qp *qp)
/* used for INIT/CLOSE port logic */
static int is_master_qp0(struct mlx4_dev *dev, struct mlx4_qp *qp, int *real_qp0, int *proxy_qp0)
{
/* this procedure is called after we already know we are on the master */
/* qp0 is either the proxy qp0, or the real qp0 */
u32 pf_proxy_offset = dev->phys_caps.base_proxy_sqpn + 8 * mlx4_master_func_num(dev);
*proxy_qp0 = qp->qpn >= pf_proxy_offset && qp->qpn <= pf_proxy_offset + 1;
*real_qp0 = qp->qpn >= dev->phys_caps.base_sqpn &&
qp->qpn <= dev->phys_caps.base_sqpn + 1;
return *real_qp0 || *proxy_qp0;
}
static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state,
struct mlx4_qp_context *context,
enum mlx4_qp_optpar optpar,
int sqd_event, struct mlx4_qp *qp, int native)
{
static const u16 op[MLX4_QP_NUM_STATE][MLX4_QP_NUM_STATE] = {
[MLX4_QP_STATE_RST] = {
@ -110,16 +131,31 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
}
};
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cmd_mailbox *mailbox;
int ret = 0;
int real_qp0 = 0;
int proxy_qp0 = 0;
u8 port;
if (cur_state >= MLX4_QP_NUM_STATE || new_state >= MLX4_QP_NUM_STATE ||
!op[cur_state][new_state])
return -EINVAL;
if (op[cur_state][new_state] == MLX4_CMD_2RST_QP)
return mlx4_cmd(dev, 0, qp->qpn, 2,
MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A);
if (op[cur_state][new_state] == MLX4_CMD_2RST_QP) {
ret = mlx4_cmd(dev, 0, qp->qpn, 2,
MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A, native);
if (mlx4_is_master(dev) && cur_state != MLX4_QP_STATE_ERR &&
cur_state != MLX4_QP_STATE_RST &&
is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) {
port = (qp->qpn & 1) + 1;
if (proxy_qp0)
priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0;
else
priv->mfunc.master.qp0_state[port].qp0_active = 0;
}
return ret;
}
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
@ -138,41 +174,197 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
((struct mlx4_qp_context *) (mailbox->buf + 8))->local_qpn =
cpu_to_be32(qp->qpn);
ret = mlx4_cmd(dev, mailbox->dma, qp->qpn | (!!sqd_event << 31),
ret = mlx4_cmd(dev, mailbox->dma,
qp->qpn | (!!sqd_event << 31),
new_state == MLX4_QP_STATE_RST ? 2 : 0,
op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C);
op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C, native);
if (mlx4_is_master(dev) && is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) {
port = (qp->qpn & 1) + 1;
if (cur_state != MLX4_QP_STATE_ERR &&
cur_state != MLX4_QP_STATE_RST &&
new_state == MLX4_QP_STATE_ERR) {
if (proxy_qp0)
priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0;
else
priv->mfunc.master.qp0_state[port].qp0_active = 0;
} else if (new_state == MLX4_QP_STATE_RTR) {
if (proxy_qp0)
priv->mfunc.master.qp0_state[port].proxy_qp0_active = 1;
else
priv->mfunc.master.qp0_state[port].qp0_active = 1;
}
}
mlx4_free_cmd_mailbox(dev, mailbox);
return ret;
}
int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state,
struct mlx4_qp_context *context,
enum mlx4_qp_optpar optpar,
int sqd_event, struct mlx4_qp *qp)
{
return __mlx4_qp_modify(dev, mtt, cur_state, new_state, context,
optpar, sqd_event, qp, 0);
}
EXPORT_SYMBOL_GPL(mlx4_qp_modify);
int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base)
int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
int *base, u8 bf_qp)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_qp_table *qp_table = &priv->qp_table;
int qpn;
qpn = mlx4_bitmap_alloc_range(&qp_table->bitmap, cnt, align);
if (qpn == -1)
if (cnt > MLX4_MAX_BF_QP_RANGE && bf_qp)
return -ENOMEM;
*base = mlx4_bitmap_alloc_range(&qp_table->bitmap, cnt, align,
bf_qp ? MLX4_BF_QP_SKIP_MASK : 0);
if (*base == -1)
return -ENOMEM;
*base = qpn;
return 0;
}
int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
int *base, u8 bf_qp)
{
u64 in_param = 0;
u64 out_param;
int err;
if (mlx4_is_mfunc(dev)) {
set_param_l(&in_param, (((!!bf_qp) << 31) | (u32)cnt));
set_param_h(&in_param, align);
err = mlx4_cmd_imm(dev, in_param, &out_param,
RES_QP, RES_OP_RESERVE,
MLX4_CMD_ALLOC_RES,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
if (err)
return err;
*base = get_param_l(&out_param);
return 0;
}
return __mlx4_qp_reserve_range(dev, cnt, align, base, bf_qp);
}
EXPORT_SYMBOL_GPL(mlx4_qp_reserve_range);
void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_qp_table *qp_table = &priv->qp_table;
if (base_qpn < dev->caps.sqp_start + 8)
return;
if (mlx4_is_qp_reserved(dev, (u32) base_qpn))
return;
mlx4_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt);
}
void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
{
u64 in_param = 0;
int err;
if (mlx4_is_mfunc(dev)) {
set_param_l(&in_param, base_qpn);
set_param_h(&in_param, cnt);
err = mlx4_cmd(dev, in_param, RES_QP, RES_OP_RESERVE,
MLX4_CMD_FREE_RES,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
if (err) {
mlx4_warn(dev, "Failed to release qp range"
" base:%d cnt:%d\n", base_qpn, cnt);
}
} else
__mlx4_qp_release_range(dev, base_qpn, cnt);
}
EXPORT_SYMBOL_GPL(mlx4_qp_release_range);
int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_qp_table *qp_table = &priv->qp_table;
int err;
err = mlx4_table_get(dev, &qp_table->qp_table, qpn);
if (err)
goto err_out;
err = mlx4_table_get(dev, &qp_table->auxc_table, qpn);
if (err)
goto err_put_qp;
err = mlx4_table_get(dev, &qp_table->altc_table, qpn);
if (err)
goto err_put_auxc;
err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn);
if (err)
goto err_put_altc;
err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn);
if (err)
goto err_put_rdmarc;
return 0;
err_put_rdmarc:
mlx4_table_put(dev, &qp_table->rdmarc_table, qpn);
err_put_altc:
mlx4_table_put(dev, &qp_table->altc_table, qpn);
err_put_auxc:
mlx4_table_put(dev, &qp_table->auxc_table, qpn);
err_put_qp:
mlx4_table_put(dev, &qp_table->qp_table, qpn);
err_out:
return err;
}
static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
{
u64 param = 0;
if (mlx4_is_mfunc(dev)) {
set_param_l(&param, qpn);
return mlx4_cmd_imm(dev, param, &param, RES_QP, RES_OP_MAP_ICM,
MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A,
MLX4_CMD_WRAPPED);
}
return __mlx4_qp_alloc_icm(dev, qpn);
}
void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_qp_table *qp_table = &priv->qp_table;
mlx4_table_put(dev, &qp_table->cmpt_table, qpn);
mlx4_table_put(dev, &qp_table->rdmarc_table, qpn);
mlx4_table_put(dev, &qp_table->altc_table, qpn);
mlx4_table_put(dev, &qp_table->auxc_table, qpn);
mlx4_table_put(dev, &qp_table->qp_table, qpn);
}
static void mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
{
u64 in_param = 0;
if (mlx4_is_mfunc(dev)) {
set_param_l(&in_param, qpn);
if (mlx4_cmd(dev, in_param, RES_QP, RES_OP_MAP_ICM,
MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
MLX4_CMD_WRAPPED))
mlx4_warn(dev, "Failed to free icm of qp:%d\n", qpn);
} else
__mlx4_qp_free_icm(dev, qpn);
}
int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp)
{
struct mlx4_priv *priv = mlx4_priv(dev);
@ -184,70 +376,29 @@ int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp)
qp->qpn = qpn;
err = mlx4_table_get(dev, &qp_table->qp_table, qp->qpn);
err = mlx4_qp_alloc_icm(dev, qpn);
if (err)
goto err_out;
err = mlx4_table_get(dev, &qp_table->auxc_table, qp->qpn);
if (err)
goto err_put_qp;
err = mlx4_table_get(dev, &qp_table->altc_table, qp->qpn);
if (err)
goto err_put_auxc;
err = mlx4_table_get(dev, &qp_table->rdmarc_table, qp->qpn);
if (err)
goto err_put_altc;
err = mlx4_table_get(dev, &qp_table->cmpt_table, qp->qpn);
if (err)
goto err_put_rdmarc;
return err;
spin_lock_irq(&qp_table->lock);
err = radix_tree_insert(&dev->qp_table_tree, qp->qpn & (dev->caps.num_qps - 1), qp);
err = radix_tree_insert(&dev->qp_table_tree, qp->qpn &
(dev->caps.num_qps - 1), qp);
spin_unlock_irq(&qp_table->lock);
if (err)
goto err_put_cmpt;
goto err_icm;
atomic_set(&qp->refcount, 1);
init_completion(&qp->free);
return 0;
err_put_cmpt:
mlx4_table_put(dev, &qp_table->cmpt_table, qp->qpn);
err_put_rdmarc:
mlx4_table_put(dev, &qp_table->rdmarc_table, qp->qpn);
err_put_altc:
mlx4_table_put(dev, &qp_table->altc_table, qp->qpn);
err_put_auxc:
mlx4_table_put(dev, &qp_table->auxc_table, qp->qpn);
err_put_qp:
mlx4_table_put(dev, &qp_table->qp_table, qp->qpn);
err_out:
err_icm:
mlx4_qp_free_icm(dev, qpn);
return err;
}
EXPORT_SYMBOL_GPL(mlx4_qp_alloc);
struct mlx4_qp *mlx4_qp_lookup_lock(struct mlx4_dev *dev, u32 qpn)
{
struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
unsigned long flags;
struct mlx4_qp *qp;
spin_lock_irqsave(&qp_table->lock, flags);
qp = radix_tree_lookup(&dev->qp_table_tree, qpn & (dev->caps.num_qps - 1));
spin_unlock_irqrestore(&qp_table->lock, flags);
return qp;
}
EXPORT_SYMBOL_GPL(mlx4_qp_lookup_lock);
void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp)
{
struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
@ -261,25 +412,18 @@ EXPORT_SYMBOL_GPL(mlx4_qp_remove);
void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp)
{
struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
if (atomic_dec_and_test(&qp->refcount))
complete(&qp->free);
wait_for_completion(&qp->free);
mlx4_table_put(dev, &qp_table->cmpt_table, qp->qpn);
mlx4_table_put(dev, &qp_table->rdmarc_table, qp->qpn);
mlx4_table_put(dev, &qp_table->altc_table, qp->qpn);
mlx4_table_put(dev, &qp_table->auxc_table, qp->qpn);
mlx4_table_put(dev, &qp_table->qp_table, qp->qpn);
mlx4_qp_free_icm(dev, qp->qpn);
}
EXPORT_SYMBOL_GPL(mlx4_qp_free);
static int mlx4_CONF_SPECIAL_QP(struct mlx4_dev *dev, u32 base_qpn)
{
return mlx4_cmd(dev, 0, base_qpn,
(dev->caps.flags & MLX4_DEV_CAP_FLAG_RAW_ETY) ? 4 : 0,
MLX4_CMD_CONF_SPECIAL_QP, MLX4_CMD_TIME_CLASS_B);
return mlx4_cmd(dev, 0, base_qpn, 0, MLX4_CMD_CONF_SPECIAL_QP,
MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
}
int mlx4_init_qp_table(struct mlx4_dev *dev)
@ -287,18 +431,23 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
int err;
int reserved_from_top = 0;
int reserved_from_bot;
int k;
spin_lock_init(&qp_table->lock);
INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC);
if (mlx4_is_slave(dev))
return 0;
/*
* We reserve 2 extra QPs per port for the special QPs. The
* block of special QPs must be aligned to a multiple of 8, so
* round up.
*
* We also reserve the MSB of the 24-bit QP number to indicate
* an XRC qp.
* that a QP is an XRC QP.
*/
dev->caps.sqp_start =
dev->phys_caps.base_sqpn =
ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8);
{
@ -329,34 +478,82 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
}
/* Reserve 8 real SQPs in both native and SRIOV modes.
* In addition, in SRIOV mode, reserve 8 proxy SQPs per function
* (for all PFs and VFs), and 8 corresponding tunnel QPs.
* Each proxy SQP works opposite its own tunnel QP.
*
* The QPs are arranged as follows:
* a. 8 real SQPs
* b. All the proxy SQPs (8 per function)
* c. All the tunnel QPs (8 per function)
*/
reserved_from_bot = mlx4_num_reserved_sqps(dev);
if (reserved_from_bot + reserved_from_top > dev->caps.num_qps) {
mlx4_err(dev, "Number of reserved QPs is higher than number "
"of QPs, increase the value of log_num_qp\n");
return -EINVAL;
}
err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps,
(1 << 23) - 1, dev->caps.sqp_start + 8,
(1 << 23) - 1, reserved_from_bot,
reserved_from_top);
if (err)
return err;
return mlx4_CONF_SPECIAL_QP(dev, dev->caps.sqp_start);
if (mlx4_is_mfunc(dev)) {
/* for PPF use */
dev->phys_caps.base_proxy_sqpn = dev->phys_caps.base_sqpn + 8;
dev->phys_caps.base_tunnel_sqpn = dev->phys_caps.base_sqpn + 8 + 8 * MLX4_MFUNC_MAX;
/* In mfunc, calculate proxy and tunnel qp offsets for the PF here,
* since the PF does not call mlx4_slave_caps */
dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
!dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) {
err = -ENOMEM;
goto err_mem;
}
for (k = 0; k < dev->caps.num_ports; k++) {
dev->caps.qp0_proxy[k] = dev->phys_caps.base_proxy_sqpn +
8 * mlx4_master_func_num(dev) + k;
dev->caps.qp0_tunnel[k] = dev->caps.qp0_proxy[k] + 8 * MLX4_MFUNC_MAX;
dev->caps.qp1_proxy[k] = dev->phys_caps.base_proxy_sqpn +
8 * mlx4_master_func_num(dev) + MLX4_MAX_PORTS + k;
dev->caps.qp1_tunnel[k] = dev->caps.qp1_proxy[k] + 8 * MLX4_MFUNC_MAX;
}
}
err = mlx4_CONF_SPECIAL_QP(dev, dev->phys_caps.base_sqpn);
if (err)
goto err_mem;
return 0;
err_mem:
kfree(dev->caps.qp0_tunnel);
kfree(dev->caps.qp0_proxy);
kfree(dev->caps.qp1_tunnel);
kfree(dev->caps.qp1_proxy);
dev->caps.qp0_tunnel = dev->caps.qp0_proxy =
dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL;
return err;
}
void mlx4_cleanup_qp_table(struct mlx4_dev *dev)
{
if (mlx4_is_slave(dev))
return;
mlx4_CONF_SPECIAL_QP(dev, 0);
mlx4_bitmap_cleanup(&mlx4_priv(dev)->qp_table.bitmap);
}
int mlx4_qp_get_region(struct mlx4_dev *dev, enum mlx4_qp_region region,
int *base_qpn, int *cnt)
{
if ((region < 0) || (region >= MLX4_NUM_QP_REGION))
return -EINVAL;
*base_qpn = dev->caps.reserved_qps_base[region];
*cnt = dev->caps.reserved_qps_cnt[region];
return 0;
}
EXPORT_SYMBOL_GPL(mlx4_qp_get_region);
int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp,
struct mlx4_qp_context *context)
{
@ -368,7 +565,8 @@ int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp,
return PTR_ERR(mailbox);
err = mlx4_cmd_box(dev, 0, mailbox->dma, qp->qpn, 0,
MLX4_CMD_QUERY_QP, MLX4_CMD_TIME_CLASS_A);
MLX4_CMD_QUERY_QP, MLX4_CMD_TIME_CLASS_A,
MLX4_CMD_WRAPPED);
if (!err)
memcpy(context, mailbox->buf + 8, sizeof *context);

View File

@ -121,7 +121,7 @@ int mlx4_reset(struct mlx4_dev *dev)
iounmap(reset);
/* Docs say to wait one second before accessing device */
msleep(1000);
msleep(2000);
end = jiffies + MLX4_RESET_TIMEOUT_JIFFIES;
do {
@ -139,11 +139,12 @@ int mlx4_reset(struct mlx4_dev *dev)
goto out;
}
/* Now restore the PCI headers */
if (pcie_cap) {
devctl = hca_header[(pcie_cap + PCI_EXP_DEVCTL) / 4];
if (pci_write_config_word(dev->pdev, pcie_cap + PCI_EXP_DEVCTL,
devctl)) {
devctl)) {
err = -ENODEV;
mlx4_err(dev, "Couldn't restore HCA PCI Express "
"Device Control register, aborting.\n");
@ -151,7 +152,7 @@ int mlx4_reset(struct mlx4_dev *dev)
}
linkctl = hca_header[(pcie_cap + PCI_EXP_LNKCTL) / 4];
if (pci_write_config_word(dev->pdev, pcie_cap + PCI_EXP_LNKCTL,
linkctl)) {
linkctl)) {
err = -ENODEV;
mlx4_err(dev, "Couldn't restore HCA PCI Express "
"Link control register, aborting.\n");

File diff suppressed because it is too large Load Diff

View File

@ -38,14 +38,15 @@
#include "mlx4.h"
static int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
enum mlx4_port_type *type)
int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
enum mlx4_port_type *type)
{
u64 out_param;
int err = 0;
err = mlx4_cmd_imm(dev, 0, &out_param, port, 0,
MLX4_CMD_SENSE_PORT, MLX4_CMD_TIME_CLASS_B);
MLX4_CMD_SENSE_PORT, MLX4_CMD_TIME_CLASS_B,
MLX4_CMD_WRAPPED);
if (err) {
mlx4_err(dev, "Sense command failed for port: %d\n", port);
return err;
@ -53,7 +54,7 @@ static int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
if (out_param > 2) {
mlx4_err(dev, "Sense returned illegal value: 0x%llx\n", out_param);
return EINVAL;
return -EINVAL;
}
*type = out_param;
@ -79,20 +80,6 @@ void mlx4_do_sense_ports(struct mlx4_dev *dev,
stype[i - 1] = defaults[i - 1];
}
/*
* Adjust port configuration:
* If port 1 sensed nothing and port 2 is IB, set both as IB
* If port 2 sensed nothing and port 1 is Eth, set both as Eth
*/
if (stype[0] == MLX4_PORT_TYPE_ETH) {
for (i = 1; i < dev->caps.num_ports; i++)
stype[i] = stype[i] ? stype[i] : MLX4_PORT_TYPE_ETH;
}
if (stype[dev->caps.num_ports - 1] == MLX4_PORT_TYPE_IB) {
for (i = 0; i < dev->caps.num_ports - 1; i++)
stype[i] = stype[i] ? stype[i] : MLX4_PORT_TYPE_IB;
}
/*
* If sensed nothing, remain in current configuration.
*/
@ -139,18 +126,26 @@ void mlx4_start_sense(struct mlx4_dev *dev)
round_jiffies(MLX4_SENSE_RANGE));
}
void mlx4_stop_sense(struct mlx4_dev *dev)
{
mlx4_priv(dev)->sense.resched = 0;
}
int mlx4_sense_init(struct mlx4_dev *dev)
void mlx4_sense_cleanup(struct mlx4_dev *dev)
{
mlx4_stop_sense(dev);
cancel_delayed_work(&mlx4_priv(dev)->sense.sense_poll);
destroy_workqueue(mlx4_priv(dev)->sense.sense_wq);
}
int mlx4_sense_init(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_sense *sense = &priv->sense;
int port;
sense->dev = dev;
sense->sense_wq = create_singlethread_workqueue("mlx4_sense");
if (!sense->sense_wq)
@ -159,14 +154,7 @@ int mlx4_sense_init(struct mlx4_dev *dev)
for (port = 1; port <= dev->caps.num_ports; port++)
sense->do_sense_port[port] = 1;
INIT_DELAYED_WORK_DEFERRABLE(&sense->sense_poll, mlx4_sense_port);
return 0;
}
INIT_DEFERRABLE_WORK(&sense->sense_poll, mlx4_sense_port);
void mlx4_sense_cleanup(struct mlx4_dev *dev)
{
mlx4_stop_sense(dev);
cancel_delayed_work(&mlx4_priv(dev)->sense.sense_poll);
destroy_workqueue(mlx4_priv(dev)->sense.sense_wq);
return 0;
}

View File

@ -34,31 +34,11 @@
#include <linux/init.h>
#include <linux/mlx4/cmd.h>
#include <linux/mlx4/srq.h>
#include <linux/gfp.h>
#include "mlx4.h"
#include "icm.h"
struct mlx4_srq_context {
__be32 state_logsize_srqn;
u8 logstride;
u8 reserved1;
__be16 xrc_domain;
__be32 pg_offset_cqn;
u32 reserved2;
u8 log_page_size;
u8 reserved3[2];
u8 mtt_base_addr_h;
__be32 mtt_base_addr_l;
__be32 pd;
__be16 limit_watermark;
__be16 wqe_cnt;
u16 reserved4;
__be16 wqe_counter;
u32 reserved5;
__be64 db_rec_addr;
};
void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type)
{
struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
@ -66,8 +46,7 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type)
spin_lock(&srq_table->lock);
srq = radix_tree_lookup(&dev->srq_table_tree,
srqn & (dev->caps.num_srqs - 1));
srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1));
if (srq)
atomic_inc(&srq->refcount);
@ -87,8 +66,9 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type)
static int mlx4_SW2HW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int srq_num)
{
return mlx4_cmd(dev, mailbox->dma, srq_num, 0, MLX4_CMD_SW2HW_SRQ,
MLX4_CMD_TIME_CLASS_A);
return mlx4_cmd(dev, mailbox->dma, srq_num, 0,
MLX4_CMD_SW2HW_SRQ, MLX4_CMD_TIME_CLASS_A,
MLX4_CMD_WRAPPED);
}
static int mlx4_HW2SW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
@ -96,20 +76,89 @@ static int mlx4_HW2SW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox
{
return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, srq_num,
mailbox ? 0 : 1, MLX4_CMD_HW2SW_SRQ,
MLX4_CMD_TIME_CLASS_A);
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
}
static int mlx4_ARM_SRQ(struct mlx4_dev *dev, int srq_num, int limit_watermark)
{
return mlx4_cmd(dev, limit_watermark, srq_num, 0, MLX4_CMD_ARM_SRQ,
MLX4_CMD_TIME_CLASS_B);
MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
}
static int mlx4_QUERY_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
int srq_num)
{
return mlx4_cmd_box(dev, 0, mailbox->dma, srq_num, 0, MLX4_CMD_QUERY_SRQ,
MLX4_CMD_TIME_CLASS_A);
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
}
int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn)
{
struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
int err;
*srqn = mlx4_bitmap_alloc(&srq_table->bitmap);
if (*srqn == -1)
return -ENOMEM;
err = mlx4_table_get(dev, &srq_table->table, *srqn);
if (err)
goto err_out;
err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn);
if (err)
goto err_put;
return 0;
err_put:
mlx4_table_put(dev, &srq_table->table, *srqn);
err_out:
mlx4_bitmap_free(&srq_table->bitmap, *srqn);
return err;
}
static int mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn)
{
u64 out_param;
int err;
if (mlx4_is_mfunc(dev)) {
err = mlx4_cmd_imm(dev, 0, &out_param, RES_SRQ,
RES_OP_RESERVE_AND_MAP,
MLX4_CMD_ALLOC_RES,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
if (!err)
*srqn = get_param_l(&out_param);
return err;
}
return __mlx4_srq_alloc_icm(dev, srqn);
}
void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn)
{
struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
mlx4_table_put(dev, &srq_table->cmpt_table, srqn);
mlx4_table_put(dev, &srq_table->table, srqn);
mlx4_bitmap_free(&srq_table->bitmap, srqn);
}
static void mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn)
{
u64 in_param = 0;
if (mlx4_is_mfunc(dev)) {
set_param_l(&in_param, srqn);
if (mlx4_cmd(dev, in_param, RES_SRQ, RES_OP_RESERVE_AND_MAP,
MLX4_CMD_FREE_RES,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED))
mlx4_warn(dev, "Failed freeing cq:%d\n", srqn);
return;
}
__mlx4_srq_free_icm(dev, srqn);
}
int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd,
@ -121,23 +170,15 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd,
u64 mtt_addr;
int err;
srq->srqn = mlx4_bitmap_alloc(&srq_table->bitmap);
if (srq->srqn == -1)
return -ENOMEM;
err = mlx4_table_get(dev, &srq_table->table, srq->srqn);
err = mlx4_srq_alloc_icm(dev, &srq->srqn);
if (err)
goto err_out;
err = mlx4_table_get(dev, &srq_table->cmpt_table, srq->srqn);
if (err)
goto err_put;
return err;
spin_lock_irq(&srq_table->lock);
err = radix_tree_insert(&dev->srq_table_tree, srq->srqn, srq);
err = radix_tree_insert(&srq_table->tree, srq->srqn, srq);
spin_unlock_irq(&srq_table->lock);
if (err)
goto err_cmpt_put;
goto err_icm;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox)) {
@ -151,7 +192,7 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd,
srq_context->state_logsize_srqn = cpu_to_be32((ilog2(srq->max) << 24) |
srq->srqn);
srq_context->logstride = srq->wqe_shift - 4;
srq_context->xrc_domain = cpu_to_be16(xrcd);
srq_context->xrcd = cpu_to_be16(xrcd);
srq_context->pg_offset_cqn = cpu_to_be32(cqn & 0xffffff);
srq_context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
@ -173,52 +214,33 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd,
err_radix:
spin_lock_irq(&srq_table->lock);
radix_tree_delete(&dev->srq_table_tree, srq->srqn);
radix_tree_delete(&srq_table->tree, srq->srqn);
spin_unlock_irq(&srq_table->lock);
err_cmpt_put:
mlx4_table_put(dev, &srq_table->cmpt_table, srq->srqn);
err_put:
mlx4_table_put(dev, &srq_table->table, srq->srqn);
err_out:
mlx4_bitmap_free(&srq_table->bitmap, srq->srqn);
err_icm:
mlx4_srq_free_icm(dev, srq->srqn);
return err;
}
EXPORT_SYMBOL_GPL(mlx4_srq_alloc);
void mlx4_srq_invalidate(struct mlx4_dev *dev, struct mlx4_srq *srq)
void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq)
{
struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
int err;
err = mlx4_HW2SW_SRQ(dev, NULL, srq->srqn);
if (err)
mlx4_warn(dev, "HW2SW_SRQ failed (%d) for SRQN %06x\n", err, srq->srqn);
}
EXPORT_SYMBOL_GPL(mlx4_srq_invalidate);
void mlx4_srq_remove(struct mlx4_dev *dev, struct mlx4_srq *srq)
{
struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
spin_lock_irq(&srq_table->lock);
radix_tree_delete(&dev->srq_table_tree, srq->srqn);
radix_tree_delete(&srq_table->tree, srq->srqn);
spin_unlock_irq(&srq_table->lock);
}
EXPORT_SYMBOL_GPL(mlx4_srq_remove);
void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq)
{
struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
if (atomic_dec_and_test(&srq->refcount))
complete(&srq->free);
wait_for_completion(&srq->free);
mlx4_table_put(dev, &srq_table->table, srq->srqn);
mlx4_bitmap_free(&srq_table->bitmap, srq->srqn);
mlx4_srq_free_icm(dev, srq->srqn);
}
EXPORT_SYMBOL_GPL(mlx4_srq_free);
@ -257,7 +279,9 @@ int mlx4_init_srq_table(struct mlx4_dev *dev)
int err;
spin_lock_init(&srq_table->lock);
INIT_RADIX_TREE(&dev->srq_table_tree, GFP_ATOMIC);
INIT_RADIX_TREE(&srq_table->tree, GFP_ATOMIC);
if (mlx4_is_slave(dev))
return 0;
err = mlx4_bitmap_init(&srq_table->bitmap, dev->caps.num_srqs,
dev->caps.num_srqs - 1, dev->caps.reserved_srqs, 0);
@ -269,5 +293,7 @@ int mlx4_init_srq_table(struct mlx4_dev *dev)
void mlx4_cleanup_srq_table(struct mlx4_dev *dev)
{
if (mlx4_is_slave(dev))
return;
mlx4_bitmap_cleanup(&mlx4_priv(dev)->srq_table.bitmap);
}

View File

@ -0,0 +1,325 @@
/*
* Copyright (c) 2010 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <linux/sched.h>
#include <linux/mutex.h>
#include <asm/atomic.h>
#include "mlx4.h"
#if defined(CONFIG_X86) && defined(CONFIG_APM_MODULE)
/* Each CPU is put into a group. In most cases, the group number is
* equal to the CPU number of one of the CPUs in the group. The
* exception is group NR_CPUS which is the default group. This is
* protected by sys_tune_startup_mutex. */
DEFINE_PER_CPU(int, idle_cpu_group) = NR_CPUS;
/* For each group, a count of the number of CPUs in the group which
* are known to be busy. A busy CPU might be running the busy loop
* below or general kernel code. The count is decremented on entry to
* the old pm_idle handler and incremented on exit. The aim is to
* avoid the count going to zero or negative. This situation can
* occur temporarily during module unload or CPU hot-plug but
* normality will be restored when the affected CPUs next exit the
* idle loop. */
static atomic_t busy_cpu_count[NR_CPUS+1];
/* A workqueue item to be executed to cause the CPU to exit from the
* idle loop. */
DEFINE_PER_CPU(struct work_struct, sys_tune_cpu_work);
#define sys_tune_set_state(CPU,STATE) \
do { } while(0)
/* A mutex to protect most of the module datastructures. */
static DEFINE_MUTEX(sys_tune_startup_mutex);
/* The old pm_idle handler. */
static void (*old_pm_idle)(void) = NULL;
static void sys_tune_pm_idle(void)
{
atomic_t *busy_cpus_ptr;
int busy_cpus;
int cpu = smp_processor_id();
busy_cpus_ptr = &(busy_cpu_count[per_cpu(idle_cpu_group, cpu)]);
sys_tune_set_state(cpu, 2);
local_irq_enable();
while (!need_resched()) {
busy_cpus = atomic_read(busy_cpus_ptr);
/* If other CPUs in this group are busy then let this
* CPU go idle. We mustn't let the number of busy
* CPUs drop below 1. */
if ( busy_cpus > 1 &&
old_pm_idle != NULL &&
( atomic_cmpxchg(busy_cpus_ptr, busy_cpus,
busy_cpus-1) == busy_cpus ) ) {
local_irq_disable();
sys_tune_set_state(cpu, 3);
/* This check might not be necessary, but it
* seems safest to include it because there
* might be a kernel version which requires
* it. */
if (need_resched())
local_irq_enable();
else
old_pm_idle();
/* This CPU is busy again. */
sys_tune_set_state(cpu, 1);
atomic_add(1, busy_cpus_ptr);
return;
}
cpu_relax();
}
sys_tune_set_state(cpu, 0);
}
void sys_tune_work_func(struct work_struct *work)
{
/* Do nothing. Since this function is running in process
* context, the idle thread isn't running on this CPU. */
}
#ifdef CONFIG_SMP
static void sys_tune_smp_call(void *info)
{
schedule_work(&get_cpu_var(sys_tune_cpu_work));
put_cpu_var(sys_tune_cpu_work);
}
#endif
#ifdef CONFIG_SMP
static void sys_tune_refresh(void)
{
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
on_each_cpu(&sys_tune_smp_call, NULL, 0, 1);
#else
on_each_cpu(&sys_tune_smp_call, NULL, 1);
#endif
}
#else
static void sys_tune_refresh(void)
{
/* The current thread is executing on the one and only CPU so
* the idle thread isn't running. */
}
#endif
static int sys_tune_cpu_group(int cpu)
{
#ifdef CONFIG_SMP
const cpumask_t *mask;
int other_cpu;
int group;
#if defined(topology_thread_cpumask) && defined(ST_HAVE_EXPORTED_CPU_SIBLING_MAP)
/* Keep one hyperthread busy per core. */
mask = topology_thread_cpumask(cpu);
#else
return cpu;
#endif
for_each_cpu_mask(cpu, *(mask)) {
group = per_cpu(idle_cpu_group, other_cpu);
if (group != NR_CPUS)
return group;
}
#endif
return cpu;
}
static void sys_tune_add_cpu(int cpu)
{
int group;
/* Do nothing if this CPU has already been added. */
if (per_cpu(idle_cpu_group, cpu) != NR_CPUS)
return;
group = sys_tune_cpu_group(cpu);
per_cpu(idle_cpu_group, cpu) = group;
atomic_inc(&(busy_cpu_count[group]));
}
static void sys_tune_del_cpu(int cpu)
{
int group;
if (per_cpu(idle_cpu_group, cpu) == NR_CPUS)
return;
group = per_cpu(idle_cpu_group, cpu);
/* If the CPU was busy, this can cause the count to drop to
* zero. To rectify this, we need to cause one of the other
* CPUs in the group to exit the idle loop. If the CPU was
* not busy then this causes the contribution for this CPU to
* go to -1 which can cause the overall count to drop to zero
* or go negative. To rectify this situation we need to cause
* this CPU to exit the idle loop. */
atomic_dec(&(busy_cpu_count[group]));
per_cpu(idle_cpu_group, cpu) = NR_CPUS;
}
static int sys_tune_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
int cpu = (long)hcpu;
switch(action) {
#ifdef CPU_ONLINE_FROZEN
case CPU_ONLINE_FROZEN:
#endif
case CPU_ONLINE:
mutex_lock(&sys_tune_startup_mutex);
sys_tune_add_cpu(cpu);
mutex_unlock(&sys_tune_startup_mutex);
/* The CPU might have already entered the idle loop in
* the wrong group. Make sure it exits the idle loop
* so that it picks up the correct group. */
sys_tune_refresh();
break;
#ifdef CPU_DEAD_FROZEN
case CPU_DEAD_FROZEN:
#endif
case CPU_DEAD:
mutex_lock(&sys_tune_startup_mutex);
sys_tune_del_cpu(cpu);
mutex_unlock(&sys_tune_startup_mutex);
/* The deleted CPU may have been the only busy CPU in
* the group. Make sure one of the other CPUs in the
* group exits the idle loop. */
sys_tune_refresh();
break;
}
return NOTIFY_OK;
}
static struct notifier_block sys_tune_cpu_nb = {
.notifier_call = sys_tune_cpu_notify,
};
static void sys_tune_ensure_init(void)
{
BUG_ON (old_pm_idle != NULL);
/* Atomically update pm_idle to &sys_tune_pm_idle. The old value
* is stored in old_pm_idle before installing the new
* handler. */
do {
old_pm_idle = pm_idle;
} while (cmpxchg(&pm_idle, old_pm_idle, &sys_tune_pm_idle) !=
old_pm_idle);
}
#endif
void sys_tune_fini(void)
{
#if defined(CONFIG_X86) && defined(CONFIG_APM_MODULE)
void (*old)(void);
int cpu;
unregister_cpu_notifier(&sys_tune_cpu_nb);
mutex_lock(&sys_tune_startup_mutex);
old = cmpxchg(&pm_idle, &sys_tune_pm_idle, old_pm_idle);
for_each_online_cpu(cpu)
sys_tune_del_cpu(cpu);
mutex_unlock(&sys_tune_startup_mutex);
/* Our handler may still be executing on other CPUs.
* Schedule this thread on all CPUs to make sure all
* idle threads get interrupted. */
sys_tune_refresh();
/* Make sure the work item has finished executing on all CPUs.
* This in turn ensures that all idle threads have been
* interrupted. */
flush_scheduled_work();
#endif /* CONFIG_X86 */
}
void sys_tune_init(void)
{
#if defined(CONFIG_X86) && defined(CONFIG_APM_MODULE)
int cpu;
for_each_possible_cpu(cpu) {
INIT_WORK(&per_cpu(sys_tune_cpu_work, cpu),
sys_tune_work_func);
}
/* Start by registering the handler to ensure we don't miss
* any updates. */
register_cpu_notifier(&sys_tune_cpu_nb);
mutex_lock(&sys_tune_startup_mutex);
for_each_online_cpu(cpu)
sys_tune_add_cpu(cpu);
sys_tune_ensure_init();
mutex_unlock(&sys_tune_startup_mutex);
/* Ensure our idle handler starts to run. */
sys_tune_refresh();
#endif
}

View File

@ -45,6 +45,7 @@ typedef struct {
#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0)
#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0)
#define atomic_inc_and_test(v) (atomic_add_return(1, (v)) == 0)
#define atomic_dec_return(v) atomic_sub_return(1, (v))
static inline int
atomic_add_return(int i, atomic_t *v)
@ -82,4 +83,25 @@ atomic_dec(atomic_t *v)
return atomic_fetchadd_int(&v->counter, -1) - 1;
}
static inline int atomic_add_unless(atomic_t *v, int a, int u)
{
int c, old;
c = atomic_read(v);
for (;;) {
if (unlikely(c == (u)))
break;
// old = atomic_cmpxchg((v), c, c + (a)); /*Linux*/
old = atomic_cmpset_int(&v->counter, c, c + (a));
if (likely(old == c))
break;
c = old;
}
return c != (u);
}
#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
#endif /* _ASM_ATOMIC_H_ */

View File

@ -30,6 +30,7 @@
#include <sys/types.h>
#include <sys/endian.h>
#include <asm/types.h>
#if BYTE_ORDER == LITTLE_ENDIAN
#define __LITTLE_ENDIAN

View File

@ -0,0 +1,53 @@
#ifndef _COMPAT_LINUX_ATOMIC_H
#define _COMPAT_LINUX_ATOMIC_H 1
/*
#include <linux/version.h>
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,36))
#include_next <linux/atomic.h>
#else
*/
#include <asm/atomic.h>
/* Shahar Klein: atomic_inc_not_zero_hint do we need it? */
#if 0
/**
* atomic_inc_not_zero_hint - increment if not null
* @v: pointer of type atomic_t
* @hint: probable value of the atomic before the increment
*
* This version of atomic_inc_not_zero() gives a hint of probable
* value of the atomic. This helps processor to not read the memory
* before doing the atomic read/modify/write cycle, lowering
* number of bus transactions on some arches.
*
* Returns: 0 if increment was not done, 1 otherwise.
*/
#ifndef atomic_inc_not_zero_hint
static inline int atomic_inc_not_zero_hint(atomic_t *v, int hint)
{
int val, c = hint;
/* sanity test, should be removed by compiler if hint is a constant */
if (!hint)
return atomic_inc_not_zero(v);
do {
val = atomic_cmpxchg(v, c, c + 1);
if (val == c)
return 1;
c = val;
} while (c);
return 0;
}
#endif
#endif
//#endif /* (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,36)) */
#endif /* _COMPAT_LINUX_ATOMIC_H */

View File

@ -35,6 +35,7 @@
#endif
#define BIT_MASK(n) (~0UL >> (BITS_PER_LONG - (n)))
#define BITS_TO_LONGS(n) howmany((n), BITS_PER_LONG)
#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
static inline int
__ffs(int mask)
@ -63,6 +64,16 @@ __flsl(long mask)
#define ffz(mask) __ffs(~(mask))
static inline int get_count_order(unsigned int count)
{
int order;
order = fls(count) - 1;
if (count & (count - 1))
order++;
return order;
}
static inline unsigned long
find_first_bit(unsigned long *addr, unsigned long size)
{
@ -314,4 +325,159 @@ test_and_set_bit(long bit, long *var)
return !!(val & bit);
}
#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
#define BITMAP_LAST_WORD_MASK(nbits) \
( \
((nbits) % BITS_PER_LONG) ? \
(1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL \
)
static inline void
bitmap_set(unsigned long *map, int start, int nr)
{
unsigned long *p = map + BIT_WORD(start);
const int size = start + nr;
int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
while (nr - bits_to_set >= 0) {
*p |= mask_to_set;
nr -= bits_to_set;
bits_to_set = BITS_PER_LONG;
mask_to_set = ~0UL;
p++;
}
if (nr) {
mask_to_set &= BITMAP_LAST_WORD_MASK(size);
*p |= mask_to_set;
}
}
static inline void
bitmap_clear(unsigned long *map, int start, int nr)
{
unsigned long *p = map + BIT_WORD(start);
const int size = start + nr;
int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
while (nr - bits_to_clear >= 0) {
*p &= ~mask_to_clear;
nr -= bits_to_clear;
bits_to_clear = BITS_PER_LONG;
mask_to_clear = ~0UL;
p++;
}
if (nr) {
mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
*p &= ~mask_to_clear;
}
}
enum {
REG_OP_ISFREE, /* true if region is all zero bits */
REG_OP_ALLOC, /* set all bits in region */
REG_OP_RELEASE, /* clear all bits in region */
};
static int __reg_op(unsigned long *bitmap, int pos, int order, int reg_op)
{
int nbits_reg; /* number of bits in region */
int index; /* index first long of region in bitmap */
int offset; /* bit offset region in bitmap[index] */
int nlongs_reg; /* num longs spanned by region in bitmap */
int nbitsinlong; /* num bits of region in each spanned long */
unsigned long mask; /* bitmask for one long of region */
int i; /* scans bitmap by longs */
int ret = 0; /* return value */
/*
* Either nlongs_reg == 1 (for small orders that fit in one long)
* or (offset == 0 && mask == ~0UL) (for larger multiword orders.)
*/
nbits_reg = 1 << order;
index = pos / BITS_PER_LONG;
offset = pos - (index * BITS_PER_LONG);
nlongs_reg = BITS_TO_LONGS(nbits_reg);
nbitsinlong = min(nbits_reg, BITS_PER_LONG);
/*
* Can't do "mask = (1UL << nbitsinlong) - 1", as that
* overflows if nbitsinlong == BITS_PER_LONG.
*/
mask = (1UL << (nbitsinlong - 1));
mask += mask - 1;
mask <<= offset;
switch (reg_op) {
case REG_OP_ISFREE:
for (i = 0; i < nlongs_reg; i++) {
if (bitmap[index + i] & mask)
goto done;
}
ret = 1; /* all bits in region free (zero) */
break;
case REG_OP_ALLOC:
for (i = 0; i < nlongs_reg; i++)
bitmap[index + i] |= mask;
break;
case REG_OP_RELEASE:
for (i = 0; i < nlongs_reg; i++)
bitmap[index + i] &= ~mask;
break;
}
done:
return ret;
}
/**
* bitmap_find_free_region - find a contiguous aligned mem region
* @bitmap: array of unsigned longs corresponding to the bitmap
* @bits: number of bits in the bitmap
* @order: region size (log base 2 of number of bits) to find
*
* Find a region of free (zero) bits in a @bitmap of @bits bits and
* allocate them (set them to one). Only consider regions of length
* a power (@order) of two, aligned to that power of two, which
* makes the search algorithm much faster.
*
* Return the bit offset in bitmap of the allocated region,
* or -errno on failure.
*/
static inline int
bitmap_find_free_region(unsigned long *bitmap, int bits, int order)
{
int pos, end; /* scans bitmap by regions of size order */
for (pos = 0 ; (end = pos + (1 << order)) <= bits; pos = end) {
if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
continue;
__reg_op(bitmap, pos, order, REG_OP_ALLOC);
return pos;
}
return -ENOMEM;
}
/**
* bitmap_release_region - release allocated bitmap region
* @bitmap: array of unsigned longs corresponding to the bitmap
* @pos: beginning of bit region to release
* @order: region size (log base 2 of number of bits) to release
*
* This is the complement to __bitmap_find_free_region() and releases
* the found region (by clearing it in the bitmap).
*
* No return value.
*/
static inline void
bitmap_release_region(unsigned long *bitmap, int pos, int order)
{
__reg_op(bitmap, pos, order, REG_OP_RELEASE);
}
#endif /* _LINUX_BITOPS_H_ */

View File

@ -0,0 +1,17 @@
/* linux/include/linux/clocksource.h
*
* MLX4_CORE_PORT
*
* This file contains the structure definitions for clocksources.
*
* If you are not a clocksource, or timekeeping code, you should
* not be including this file!
*/
#ifndef _LINUX_CLOCKSOURCE_H
#define _LINUX_CLOCKSOURCE_H
/* clocksource cycle base type */
typedef u64 cycle_t;
#endif /* _LINUX_CLOCKSOURCE_H */

View File

@ -29,5 +29,8 @@
#ifndef _LINUX_COMPAT_H_
#define _LINUX_COMPAT_H_
#define is_multicast_ether_addr(x) 0
#define is_broadcast_ether_addr(x) 0
#endif /* _LINUX_COMPAT_H_ */

View File

@ -385,4 +385,10 @@ class_remove_file(struct class *class, const struct class_attribute *attr)
sysfs_remove_file(&class->kobj, &attr->attr);
}
static inline int dev_to_node(struct device *dev)
{
return -1;
}
#endif /* _LINUX_DEVICE_H_ */

View File

@ -245,6 +245,13 @@ dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
return (0);
}
static inline unsigned int dma_set_max_seg_size(struct device *dev,
unsigned int size)
{
return (0);
}
#define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, NULL)
#define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s, r, NULL)
#define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, NULL)

View File

@ -121,4 +121,8 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
return (virt_to_page(page));
}
#define alloc_pages_node(node, mask, order) alloc_pages(mask, order)
#define kmalloc_node(chunk, mask, node) kmalloc(chunk, mask)
#endif /* _LINUX_GFP_H_ */

View File

@ -40,6 +40,10 @@
#define MAX_ID_MASK (MAX_ID_BIT - 1)
#define MAX_LEVEL (MAX_ID_SHIFT + IDR_BITS - 1) / IDR_BITS
#define MAX_IDR_SHIFT (sizeof(int)*8 - 1)
#define MAX_IDR_BIT (1U << MAX_IDR_SHIFT)
#define MAX_IDR_MASK (MAX_IDR_BIT - 1)
struct idr_layer {
unsigned long bitmap;
struct idr_layer *ary[IDR_SIZE];

View File

@ -34,4 +34,9 @@
#define ETH_P_8021Q ETHERTYPE_VLAN
/*
* defined Ethernet Protocol ID's.
*/
#define ETH_P_IP 0x0800 /* Internet Protocol packet */
#endif /* _LINUX_IF_ETHER_H_ */

View File

@ -29,6 +29,8 @@
#ifndef _LINUX_IN6_H_
#define _LINUX_IN6_H_
#ifndef KLD_MODULE
#include "opt_inet6.h"
#endif
#endif /* _LINUX_IN6_H_ */

View File

@ -47,6 +47,7 @@
#include <linux/log2.h>
#include <asm/byteorder.h>
#define KERN_CONT ""
#define KERN_EMERG "<0>"
#define KERN_ALERT "<1>"
#define KERN_CRIT "<2>"
@ -68,6 +69,60 @@
#define pr_debug(fmt, ...) printk(KERN_DEBUG # fmt, ##__VA_ARGS__)
#define udelay(t) DELAY(t)
#ifndef pr_fmt
#define pr_fmt(fmt) fmt
#endif
/*
* Print a one-time message (analogous to WARN_ONCE() et al):
*/
#define printk_once(x...) ({ \
static bool __print_once; \
\
if (!__print_once) { \
__print_once = true; \
printk(x); \
} \
})
#define pr_emerg(fmt, ...) \
printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
#define pr_alert(fmt, ...) \
printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
#define pr_crit(fmt, ...) \
printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
#define pr_err(fmt, ...) \
printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
#define pr_warning(fmt, ...) \
printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
#define pr_warn pr_warning
#define pr_notice(fmt, ...) \
printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
#define pr_info(fmt, ...) \
printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
#define pr_cont(fmt, ...) \
printk(KERN_CONT fmt, ##__VA_ARGS__)
/* pr_devel() should produce zero code unless DEBUG is defined */
#ifdef DEBUG
#define pr_devel(fmt, ...) \
printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#else
#define pr_devel(fmt, ...) \
({ if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); 0; })
#endif
#ifndef WARN
#define WARN(condition, format...) ({ \
int __ret_warn_on = !!(condition); \
if (unlikely(__ret_warn_on)) \
pr_warning(format); \
unlikely(__ret_warn_on); \
})
#endif
#define container_of(ptr, type, member) \
({ \
__typeof(((type *)0)->member) *_p = (ptr); \
@ -77,12 +132,27 @@
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#define simple_strtoul strtoul
#define simple_strtol strtol
#define min(x, y) (x < y ? x : y)
#define max(x, y) (x > y ? x : y)
#define min_t(type, _x, _y) (type)(_x) < (type)(_y) ? (type)(_x) : (_y)
#define max_t(type, _x, _y) (type)(_x) > (type)(_y) ? (type)(_x) : (_y)
/*
* This looks more complex than it should be. But we need to
* get the type for the ~ right in round_down (it needs to be
* as wide as the result!), and we want to evaluate the macro
* arguments just once each.
*/
#define __round_mask(x, y) ((__typeof__(x))((y)-1))
#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
#define round_down(x, y) ((x) & ~__round_mask(x, y))
#define num_possible_cpus() mp_ncpus
typedef struct pm_message {
int event;
} pm_message_t;
#endif /* _LINUX_KERNEL_H_ */

View File

@ -267,6 +267,8 @@ linux_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
if ((error = devfs_get_cdevpriv((void **)&filp)) != 0)
return (error);
filp->f_flags = file->f_flag;
devfs_clear_cdevpriv();
return (0);
}

View File

@ -111,6 +111,9 @@ list_del_init(struct list_head *entry)
#define list_entry(ptr, type, field) container_of(ptr, type, field)
#define list_first_entry(ptr, type, member) \
list_entry((ptr)->next, type, member)
#define list_for_each(p, head) \
for (p = (head)->next; p != (head); p = p->next)

View File

@ -51,10 +51,119 @@ rounddown_pow_of_two(unsigned long x)
return (1UL << (flsl(x) - 1));
}
static inline unsigned long
ilog2(unsigned long x)
/*
* deal with unrepresentable constant logarithms
*/
extern __attribute__((const, noreturn))
int ____ilog2_NaN(void);
/*
* non-constant log of base 2 calculators
* - the arch may override these in asm/bitops.h if they can be implemented
* more efficiently than using fls() and fls64()
* - the arch is not required to handle n==0 if implementing the fallback
*/
#ifndef CONFIG_ARCH_HAS_ILOG2_U32
static inline __attribute__((const))
int __ilog2_u32(u32 n)
{
return (flsl(x) - 1);
return flsl(n) - 1;
}
#endif
#ifndef CONFIG_ARCH_HAS_ILOG2_U64
static inline __attribute__((const))
int __ilog2_u64(u64 n)
{
return flsl(n) - 1;
}
#endif
/**
* ilog2 - log of base 2 of 32-bit or a 64-bit unsigned value
* @n - parameter
*
* constant-capable log of base 2 calculation
* - this can be used to initialise global variables from constant data, hence
* the massive ternary operator construction
*
* selects the appropriately-sized optimised version depending on sizeof(n)
*/
#define ilog2(n) \
( \
__builtin_constant_p(n) ? ( \
(n) < 1 ? ____ilog2_NaN() : \
(n) & (1ULL << 63) ? 63 : \
(n) & (1ULL << 62) ? 62 : \
(n) & (1ULL << 61) ? 61 : \
(n) & (1ULL << 60) ? 60 : \
(n) & (1ULL << 59) ? 59 : \
(n) & (1ULL << 58) ? 58 : \
(n) & (1ULL << 57) ? 57 : \
(n) & (1ULL << 56) ? 56 : \
(n) & (1ULL << 55) ? 55 : \
(n) & (1ULL << 54) ? 54 : \
(n) & (1ULL << 53) ? 53 : \
(n) & (1ULL << 52) ? 52 : \
(n) & (1ULL << 51) ? 51 : \
(n) & (1ULL << 50) ? 50 : \
(n) & (1ULL << 49) ? 49 : \
(n) & (1ULL << 48) ? 48 : \
(n) & (1ULL << 47) ? 47 : \
(n) & (1ULL << 46) ? 46 : \
(n) & (1ULL << 45) ? 45 : \
(n) & (1ULL << 44) ? 44 : \
(n) & (1ULL << 43) ? 43 : \
(n) & (1ULL << 42) ? 42 : \
(n) & (1ULL << 41) ? 41 : \
(n) & (1ULL << 40) ? 40 : \
(n) & (1ULL << 39) ? 39 : \
(n) & (1ULL << 38) ? 38 : \
(n) & (1ULL << 37) ? 37 : \
(n) & (1ULL << 36) ? 36 : \
(n) & (1ULL << 35) ? 35 : \
(n) & (1ULL << 34) ? 34 : \
(n) & (1ULL << 33) ? 33 : \
(n) & (1ULL << 32) ? 32 : \
(n) & (1ULL << 31) ? 31 : \
(n) & (1ULL << 30) ? 30 : \
(n) & (1ULL << 29) ? 29 : \
(n) & (1ULL << 28) ? 28 : \
(n) & (1ULL << 27) ? 27 : \
(n) & (1ULL << 26) ? 26 : \
(n) & (1ULL << 25) ? 25 : \
(n) & (1ULL << 24) ? 24 : \
(n) & (1ULL << 23) ? 23 : \
(n) & (1ULL << 22) ? 22 : \
(n) & (1ULL << 21) ? 21 : \
(n) & (1ULL << 20) ? 20 : \
(n) & (1ULL << 19) ? 19 : \
(n) & (1ULL << 18) ? 18 : \
(n) & (1ULL << 17) ? 17 : \
(n) & (1ULL << 16) ? 16 : \
(n) & (1ULL << 15) ? 15 : \
(n) & (1ULL << 14) ? 14 : \
(n) & (1ULL << 13) ? 13 : \
(n) & (1ULL << 12) ? 12 : \
(n) & (1ULL << 11) ? 11 : \
(n) & (1ULL << 10) ? 10 : \
(n) & (1ULL << 9) ? 9 : \
(n) & (1ULL << 8) ? 8 : \
(n) & (1ULL << 7) ? 7 : \
(n) & (1ULL << 6) ? 6 : \
(n) & (1ULL << 5) ? 5 : \
(n) & (1ULL << 4) ? 4 : \
(n) & (1ULL << 3) ? 3 : \
(n) & (1ULL << 2) ? 2 : \
(n) & (1ULL << 1) ? 1 : \
(n) & (1ULL << 0) ? 0 : \
____ilog2_NaN() \
) : \
(sizeof(n) <= 4) ? \
__ilog2_u32(n) : \
__ilog2_u64(n) \
)
#endif /* _LINUX_LOG2_H_ */

View File

@ -59,12 +59,16 @@ enum {
MLX4_CMD_HW_HEALTH_CHECK = 0x50,
MLX4_CMD_SET_PORT = 0xc,
MLX4_CMD_SET_NODE = 0x5a,
MLX4_CMD_QUERY_FUNC = 0x56,
MLX4_CMD_ACCESS_DDR = 0x2e,
MLX4_CMD_MAP_ICM = 0xffa,
MLX4_CMD_UNMAP_ICM = 0xff9,
MLX4_CMD_MAP_ICM_AUX = 0xffc,
MLX4_CMD_UNMAP_ICM_AUX = 0xffb,
MLX4_CMD_SET_ICM_SIZE = 0xffd,
/*master notify fw on finish for slave's flr*/
MLX4_CMD_INFORM_FLR_DONE = 0x5b,
MLX4_CMD_GET_OP_REQ = 0x59,
/* TPT commands */
MLX4_CMD_SW2HW_MPT = 0xd,
@ -119,6 +123,26 @@ enum {
/* miscellaneous commands */
MLX4_CMD_DIAG_RPRT = 0x30,
MLX4_CMD_NOP = 0x31,
MLX4_CMD_ACCESS_MEM = 0x2e,
MLX4_CMD_SET_VEP = 0x52,
/* Ethernet specific commands */
MLX4_CMD_SET_VLAN_FLTR = 0x47,
MLX4_CMD_SET_MCAST_FLTR = 0x48,
MLX4_CMD_DUMP_ETH_STATS = 0x49,
/* Communication channel commands */
MLX4_CMD_ARM_COMM_CHANNEL = 0x57,
MLX4_CMD_GEN_EQE = 0x58,
/* virtual commands */
MLX4_CMD_ALLOC_RES = 0xf00,
MLX4_CMD_FREE_RES = 0xf01,
MLX4_CMD_MCAST_ATTACH = 0xf05,
MLX4_CMD_UCAST_ATTACH = 0xf06,
MLX4_CMD_PROMISC = 0xf08,
MLX4_CMD_QUERY_FUNC_CAP = 0xf0a,
MLX4_CMD_QP_ATTACH = 0xf0b,
/* debug commands */
MLX4_CMD_QUERY_DEBUG_MSG = 0x2a,
@ -127,16 +151,26 @@ enum {
/* statistics commands */
MLX4_CMD_QUERY_IF_STAT = 0X54,
MLX4_CMD_SET_IF_STAT = 0X55,
/* set port opcode modifiers */
MLX4_SET_PORT_PRIO2TC = 0x8,
MLX4_SET_PORT_SCHEDULER = 0x9,
/* register/delete flow steering network rules */
MLX4_QP_FLOW_STEERING_ATTACH = 0x65,
MLX4_QP_FLOW_STEERING_DETACH = 0x66,
MLX4_FLOW_STEERING_IB_UC_QP_RANGE = 0x64,
};
enum {
MLX4_CMD_TIME_CLASS_A = 10000,
MLX4_CMD_TIME_CLASS_B = 10000,
MLX4_CMD_TIME_CLASS_C = 10000,
MLX4_CMD_TIME_CLASS_A = 60000,
MLX4_CMD_TIME_CLASS_B = 60000,
MLX4_CMD_TIME_CLASS_C = 60000,
};
enum {
MLX4_MAILBOX_SIZE = 4096
MLX4_MAILBOX_SIZE = 4096,
MLX4_ACCESS_MEM_ALIGN = 256,
};
enum {
@ -149,6 +183,11 @@ enum {
MLX4_SET_PORT_GID_TABLE = 0x5,
};
enum {
MLX4_CMD_WRAPPED,
MLX4_CMD_NATIVE
};
struct mlx4_dev;
struct mlx4_cmd_mailbox {
@ -158,23 +197,24 @@ struct mlx4_cmd_mailbox {
int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
int out_is_imm, u32 in_modifier, u8 op_modifier,
u16 op, unsigned long timeout);
u16 op, unsigned long timeout, int native);
/* Invoke a command with no output parameter */
static inline int mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u32 in_modifier,
u8 op_modifier, u16 op, unsigned long timeout)
u8 op_modifier, u16 op, unsigned long timeout,
int native)
{
return __mlx4_cmd(dev, in_param, NULL, 0, in_modifier,
op_modifier, op, timeout);
op_modifier, op, timeout, native);
}
/* Invoke a command with an output mailbox */
static inline int mlx4_cmd_box(struct mlx4_dev *dev, u64 in_param, u64 out_param,
u32 in_modifier, u8 op_modifier, u16 op,
unsigned long timeout)
unsigned long timeout, int native)
{
return __mlx4_cmd(dev, in_param, &out_param, 0, in_modifier,
op_modifier, op, timeout);
op_modifier, op, timeout, native);
}
/*
@ -184,13 +224,21 @@ static inline int mlx4_cmd_box(struct mlx4_dev *dev, u64 in_param, u64 out_param
*/
static inline int mlx4_cmd_imm(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
u32 in_modifier, u8 op_modifier, u16 op,
unsigned long timeout)
unsigned long timeout, int native)
{
return __mlx4_cmd(dev, in_param, out_param, 1, in_modifier,
op_modifier, op, timeout);
op_modifier, op, timeout, native);
}
struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev);
void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox);
u32 mlx4_comm_get_version(void);
int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u8 *mac);
int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos);
int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting);
#define MLX4_COMM_GET_IF_REV(cmd_chan_ver) (u8)((cmd_chan_ver) >> 8)
#endif /* MLX4_CMD_H */

View File

@ -64,6 +64,22 @@ struct mlx4_err_cqe {
u8 owner_sr_opcode;
};
struct mlx4_ts_cqe {
__be32 vlan_my_qpn;
__be32 immed_rss_invalid;
__be32 g_mlpath_rqpn;
__be32 timestamp_hi;
__be16 status;
u8 ipv6_ext_mask;
u8 badfcs_enc;
__be32 byte_cnt;
__be16 wqe_index;
__be16 checksum;
u8 reserved;
__be16 timestamp_lo;
u8 owner_sr_opcode;
} __packed;
enum {
MLX4_CQE_VLAN_PRESENT_MASK = 1 << 29,
MLX4_CQE_QPN_MASK = 0xffffff,
@ -146,5 +162,5 @@ int mlx4_cq_modify(struct mlx4_dev *dev, struct mlx4_cq *cq,
u16 count, u16 period);
int mlx4_cq_resize(struct mlx4_dev *dev, struct mlx4_cq *cq,
int entries, struct mlx4_mtt *mtt);
int mlx4_cq_ignore_overrun(struct mlx4_dev *dev, struct mlx4_cq *cq);
#endif /* MLX4_CQ_H */

View File

@ -36,46 +36,175 @@
#include <linux/pci.h>
#include <linux/completion.h>
#include <linux/radix-tree.h>
//#include <linux/cpu_rmap.h> /* XXX SK Probably not needed in freeBSD XXX */
#include <asm/atomic.h>
#include <linux/mlx4/driver.h>
#include <linux/clocksource.h> /* XXX SK ported to freeBSD */
#define MAX_MSIX_P_PORT 17
#define MAX_MSIX 64
#define MSIX_LEGACY_SZ 4
#define MIN_MSIX_P_PORT 5
#define MLX4_ROCE_MAX_GIDS 128
#define MLX4_ROCE_PF_GIDS 16
#define MLX4_NUM_UP 8
#define MLX4_NUM_TC 8
#define MLX4_MAX_100M_UNITS_VAL 255 /*
* work around: can't set values
* greater then this value when
* using 100 Mbps units.
*/
#define MLX4_RATELIMIT_100M_UNITS 3 /* 100 Mbps */
#define MLX4_RATELIMIT_1G_UNITS 4 /* 1 Gbps */
#define MLX4_RATELIMIT_DEFAULT 0x00ff
#define MLX4_LEAST_ATTACHED_VECTOR 0xffffffff
enum {
MLX4_FLAG_MSI_X = 1 << 0,
MLX4_FLAG_OLD_PORT_CMDS = 1 << 1,
MLX4_FLAG_MASTER = 1 << 2,
MLX4_FLAG_SLAVE = 1 << 3,
MLX4_FLAG_SRIOV = 1 << 4,
};
enum {
MLX4_MAX_PORTS = 2
MLX4_PORT_CAP_IS_SM = 1 << 1,
MLX4_PORT_CAP_DEV_MGMT_SUP = 1 << 19,
};
enum {
MLX4_MAX_PORTS = 2,
MLX4_MAX_PORT_PKEYS = 128
};
/* base qkey for use in sriov tunnel-qp/proxy-qp communication.
* These qkeys must not be allowed for general use. This is a 64k range,
* and to test for violation, we use the mask (protect against future chg).
*/
#define MLX4_RESERVED_QKEY_BASE (0xFFFF0000)
#define MLX4_RESERVED_QKEY_MASK (0xFFFF0000)
enum {
MLX4_BOARD_ID_LEN = 64
};
enum {
MLX4_DEV_CAP_FLAG_RC = 1 << 0,
MLX4_DEV_CAP_FLAG_UC = 1 << 1,
MLX4_DEV_CAP_FLAG_UD = 1 << 2,
MLX4_DEV_CAP_FLAG_XRC = 1 << 3,
MLX4_DEV_CAP_FLAG_SRQ = 1 << 6,
MLX4_DEV_CAP_FLAG_IPOIB_CSUM = 1 << 7,
MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1 << 8,
MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1 << 9,
MLX4_DEV_CAP_FLAG_DPDP = 1 << 12,
MLX4_DEV_CAP_FLAG_RAW_ETY = 1 << 13,
MLX4_DEV_CAP_FLAG_BLH = 1 << 15,
MLX4_DEV_CAP_FLAG_MEM_WINDOW = 1 << 16,
MLX4_DEV_CAP_FLAG_APM = 1 << 17,
MLX4_DEV_CAP_FLAG_ATOMIC = 1 << 18,
MLX4_DEV_CAP_FLAG_RAW_MCAST = 1 << 19,
MLX4_DEV_CAP_FLAG_UD_AV_PORT = 1 << 20,
MLX4_DEV_CAP_FLAG_UD_MCAST = 1 << 21,
MLX4_DEV_CAP_FLAG_IBOE = 1 << 30,
MLX4_DEV_CAP_FLAG_FC_T11 = 1 << 31
MLX4_MAX_NUM_PF = 16,
MLX4_MAX_NUM_VF = 64,
MLX4_MFUNC_MAX = 80,
MLX4_MAX_EQ_NUM = 1024,
MLX4_MFUNC_EQ_NUM = 4,
MLX4_MFUNC_MAX_EQES = 8,
MLX4_MFUNC_EQE_MASK = (MLX4_MFUNC_MAX_EQES - 1)
};
/* Driver supports 3 diffrent device methods to manage traffic steering:
* -device managed - High level API for ib and eth flow steering. FW is
* managing flow steering tables.
* - B0 steering mode - Common low level API for ib and (if supported) eth.
* - A0 steering mode - Limited low level API for eth. In case of IB,
* B0 mode is in use.
*/
enum {
MLX4_STEERING_MODE_A0,
MLX4_STEERING_MODE_B0,
MLX4_STEERING_MODE_DEVICE_MANAGED
};
static inline const char *mlx4_steering_mode_str(int steering_mode)
{
switch (steering_mode) {
case MLX4_STEERING_MODE_A0:
return "A0 steering";
case MLX4_STEERING_MODE_B0:
return "B0 steering";
case MLX4_STEERING_MODE_DEVICE_MANAGED:
return "Device managed flow steering";
default:
return "Unrecognize steering mode";
}
}
enum {
MLX4_DEV_CAP_FLAG_RC = 1LL << 0,
MLX4_DEV_CAP_FLAG_UC = 1LL << 1,
MLX4_DEV_CAP_FLAG_UD = 1LL << 2,
MLX4_DEV_CAP_FLAG_XRC = 1LL << 3,
MLX4_DEV_CAP_FLAG_SRQ = 1LL << 6,
MLX4_DEV_CAP_FLAG_IPOIB_CSUM = 1LL << 7,
MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1LL << 8,
MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1LL << 9,
MLX4_DEV_CAP_FLAG_DPDP = 1LL << 12,
MLX4_DEV_CAP_FLAG_BLH = 1LL << 15,
MLX4_DEV_CAP_FLAG_MEM_WINDOW = 1LL << 16,
MLX4_DEV_CAP_FLAG_APM = 1LL << 17,
MLX4_DEV_CAP_FLAG_ATOMIC = 1LL << 18,
MLX4_DEV_CAP_FLAG_RAW_MCAST = 1LL << 19,
MLX4_DEV_CAP_FLAG_UD_AV_PORT = 1LL << 20,
MLX4_DEV_CAP_FLAG_UD_MCAST = 1LL << 21,
MLX4_DEV_CAP_FLAG_IBOE = 1LL << 30,
MLX4_DEV_CAP_FLAG_UC_LOOPBACK = 1LL << 32,
MLX4_DEV_CAP_FLAG_FCS_KEEP = 1LL << 34,
MLX4_DEV_CAP_FLAG_WOL_PORT1 = 1LL << 37,
MLX4_DEV_CAP_FLAG_WOL_PORT2 = 1LL << 38,
MLX4_DEV_CAP_FLAG_UDP_RSS = 1LL << 40,
MLX4_DEV_CAP_FLAG_VEP_UC_STEER = 1LL << 41,
MLX4_DEV_CAP_FLAG_VEP_MC_STEER = 1LL << 42,
MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48,
MLX4_DEV_CAP_FLAG_COUNTERS_EXT = 1LL << 49,
MLX4_DEV_CAP_FLAG_SET_PORT_ETH_SCHED = 1LL << 53,
MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55,
MLX4_DEV_CAP_FLAG_FAST_DROP = 1LL << 57,
MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59,
MLX4_DEV_CAP_FLAG_ESWITCH_SUPPORT = 1LL << 60,
MLX4_DEV_CAP_FLAG_64B_EQE = 1LL << 61,
MLX4_DEV_CAP_FLAG_64B_CQE = 1LL << 62
};
enum {
MLX4_DEV_CAP_FLAG2_RSS = 1LL << 0,
MLX4_DEV_CAP_FLAG2_RSS_TOP = 1LL << 1,
MLX4_DEV_CAP_FLAG2_RSS_XOR = 1LL << 2,
MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3
};
enum {
MLX4_DEV_CAP_64B_EQE_ENABLED = 1LL << 0,
MLX4_DEV_CAP_64B_CQE_ENABLED = 1LL << 1
};
enum {
MLX4_USER_DEV_CAP_64B_CQE = 1L << 0
};
enum {
MLX4_FUNC_CAP_64B_EQE_CQE = 1L << 0
};
/* bit enums for an 8-bit flags field indicating special use
* QPs which require special handling in qp_reserve_range.
* Currently, this only includes QPs used by the ETH interface,
* where we expect to use blueflame. These QPs must not have
* bits 6 and 7 set in their qp number.
*
* This enum may use only bits 0..7.
*/
enum {
MLX4_RESERVE_BF_QP = 1 << 7,
};
#define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90)
enum {
MLX4_BMME_FLAG_LOCAL_INV = 1 << 6,
MLX4_BMME_FLAG_REMOTE_INV = 1 << 7,
@ -102,7 +231,14 @@ enum mlx4_event {
MLX4_EVENT_TYPE_PORT_CHANGE = 0x09,
MLX4_EVENT_TYPE_EQ_OVERFLOW = 0x0f,
MLX4_EVENT_TYPE_ECC_DETECT = 0x0e,
MLX4_EVENT_TYPE_CMD = 0x0a
MLX4_EVENT_TYPE_CMD = 0x0a,
MLX4_EVENT_TYPE_VEP_UPDATE = 0x19,
MLX4_EVENT_TYPE_COMM_CHANNEL = 0x18,
MLX4_EVENT_TYPE_OP_REQUIRED = 0x1a,
MLX4_EVENT_TYPE_FATAL_WARNING = 0x1b,
MLX4_EVENT_TYPE_FLR_EVENT = 0x1c,
MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT = 0x1d,
MLX4_EVENT_TYPE_NONE = 0xff,
};
enum {
@ -110,6 +246,29 @@ enum {
MLX4_PORT_CHANGE_SUBTYPE_ACTIVE = 4
};
enum {
MLX4_FATAL_WARNING_SUBTYPE_WARMING = 0,
};
enum slave_port_state {
SLAVE_PORT_DOWN = 0,
SLAVE_PENDING_UP,
SLAVE_PORT_UP,
};
enum slave_port_gen_event {
SLAVE_PORT_GEN_EVENT_DOWN = 0,
SLAVE_PORT_GEN_EVENT_UP,
SLAVE_PORT_GEN_EVENT_NONE,
};
enum slave_port_state_event {
MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN,
MLX4_PORT_STATE_DEV_EVENT_PORT_UP,
MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID,
MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
};
enum {
MLX4_PERM_LOCAL_READ = 1 << 10,
MLX4_PERM_LOCAL_WRITE = 1 << 11,
@ -126,7 +285,6 @@ enum {
MLX4_OPCODE_SEND = 0x0a,
MLX4_OPCODE_SEND_IMM = 0x0b,
MLX4_OPCODE_LSO = 0x0e,
MLX4_OPCODE_BIG_LSO = 0x2e,
MLX4_OPCODE_RDMA_READ = 0x10,
MLX4_OPCODE_ATOMIC_CS = 0x11,
MLX4_OPCODE_ATOMIC_FA = 0x12,
@ -150,14 +308,26 @@ enum {
MLX4_STAT_RATE_OFFSET = 5
};
enum mlx4_protocol {
MLX4_PROT_IB_IPV6 = 0,
MLX4_PROT_ETH,
MLX4_PROT_IB_IPV4,
MLX4_PROT_FCOE
};
enum {
MLX4_MTT_FLAG_PRESENT = 1
};
enum {
MLX4_MAX_MTT_SHIFT = 31
};
enum mlx4_qp_region {
MLX4_QP_REGION_FW = 0,
MLX4_QP_REGION_ETH_ADDR,
MLX4_QP_REGION_FC_ADDR,
MLX4_QP_REGION_FC_EXCH,
MLX4_NUM_QP_REGION
};
@ -173,25 +343,56 @@ enum mlx4_special_vlan_idx {
MLX4_VLAN_MISS_IDX,
MLX4_VLAN_REGULAR
};
#define MLX4_LEAST_ATTACHED_VECTOR 0xffffffff
enum {
MLX4_CUNTERS_DISABLED,
MLX4_CUNTERS_BASIC,
MLX4_CUNTERS_EXT
enum mlx4_steer_type {
MLX4_MC_STEER = 0,
MLX4_UC_STEER,
MLX4_NUM_STEERS
};
enum {
MAX_FAST_REG_PAGES = 511,
MLX4_NUM_FEXCH = 64 * 1024,
};
enum {
MLX4_MAX_FAST_REG_PAGES = 511,
};
enum {
MLX4_DEV_PMC_SUBTYPE_GUID_INFO = 0x14,
MLX4_DEV_PMC_SUBTYPE_PORT_INFO = 0x15,
MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE = 0x16,
};
/* Port mgmt change event handling */
enum {
MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK = 1 << 0,
MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK = 1 << 1,
MLX4_EQ_PORT_INFO_LID_CHANGE_MASK = 1 << 2,
MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK = 1 << 3,
MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK = 1 << 4,
};
#define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \
MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK)
static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor)
{
return (major << 32) | (minor << 16) | subminor;
}
struct mlx4_phys_caps {
u32 gid_phys_table_len[MLX4_MAX_PORTS + 1];
u32 pkey_phys_table_len[MLX4_MAX_PORTS + 1];
u32 num_phys_eqs;
u32 base_sqpn;
u32 base_proxy_sqpn;
u32 base_tunnel_sqpn;
};
struct mlx4_caps {
u64 fw_ver;
u32 function;
int num_ports;
int vl_cap[MLX4_MAX_PORTS + 1];
int ib_mtu_cap[MLX4_MAX_PORTS + 1];
@ -206,6 +407,7 @@ struct mlx4_caps {
u64 trans_code[MLX4_MAX_PORTS + 1];
int local_ca_ack_delay;
int num_uars;
u32 uar_page_size;
int bf_reg_size;
int bf_regs_per_page;
int max_sq_sg;
@ -216,7 +418,10 @@ struct mlx4_caps {
int max_rq_desc_sz;
int max_qp_init_rdma;
int max_qp_dest_rdma;
int sqp_start;
u32 *qp0_proxy;
u32 *qp1_proxy;
u32 *qp0_tunnel;
u32 *qp1_tunnel;
int num_srqs;
int max_srq_wqes;
int max_srq_sge;
@ -227,9 +432,10 @@ struct mlx4_caps {
int num_eqs;
int reserved_eqs;
int num_comp_vectors;
int comp_pool;
int num_mpts;
int num_mtt_segs;
int mtts_per_seg;
int max_fmr_maps;
int num_mtts;
int fmr_reserved_mtts;
int reserved_mtts;
int reserved_mrws;
@ -238,36 +444,47 @@ struct mlx4_caps {
int num_amgms;
int reserved_mcgs;
int num_qp_per_mgm;
int steering_mode;
int num_pds;
int reserved_pds;
int mtt_entry_sz;
int reserved_xrcds;
int max_xrcds;
int reserved_xrcds;
int mtt_entry_sz;
u32 max_msg_sz;
u32 page_size_cap;
u64 flags;
u64 flags2;
u32 bmme_flags;
u32 reserved_lkey;
u16 stat_rate_support;
int udp_rss;
int loopback_support;
int wol;
u8 cq_timestamp;
u8 port_width_cap[MLX4_MAX_PORTS + 1];
int max_gso_sz;
int max_rss_tbl_sz;
int reserved_qps_cnt[MLX4_NUM_QP_REGION];
int reserved_qps;
int reserved_qps_base[MLX4_NUM_QP_REGION];
int log_num_macs;
int log_num_vlans;
int log_num_prios;
enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
u8 supported_type[MLX4_MAX_PORTS + 1];
enum mlx4_port_type port_mask[MLX4_MAX_PORTS + 1];
u8 suggested_type[MLX4_MAX_PORTS + 1];
u8 default_sense[MLX4_MAX_PORTS + 1];
u32 port_mask[MLX4_MAX_PORTS + 1];
enum mlx4_port_type possible_type[MLX4_MAX_PORTS + 1];
u8 counters_mode;
u32 max_counters;
u8 port_ib_mtu[MLX4_MAX_PORTS + 1];
u16 sqp_demux;
u32 sync_qp;
u32 eqe_size;
u32 cqe_size;
u8 eqe_factor;
u32 userspace_caps; /* userspace must be aware to */
u32 function_caps; /* functions must be aware to */
u8 fast_drop;
u16 hca_core_clock;
u32 max_basic_counters;
u32 max_ext_counters;
u32 mc_promisc_mode;
u32 max_extended_counters;
};
struct mlx4_buf_list {
@ -284,7 +501,7 @@ struct mlx4_buf {
};
struct mlx4_mtt {
u32 first_seg;
u32 offset;
int order;
int page_shift;
};
@ -375,6 +592,8 @@ struct mlx4_cq {
atomic_t refcount;
struct completion free;
int eqn;
u16 irq;
};
struct mlx4_qp {
@ -432,52 +651,168 @@ union mlx4_ext_av {
struct mlx4_eth_av eth;
};
struct mlx4_counters {
__be32 counter_mode;
__be32 num_ifc;
u32 reserved[2];
__be64 rx_frames;
__be64 rx_bytes;
__be64 tx_frames;
__be64 tx_bytes;
struct mlx4_if_stat_control {
u8 reserved1[3];
/* Extended counters enabled */
u8 cnt_mode;
/* Number of interfaces */
__be32 num_of_if;
__be32 reserved[2];
};
struct mlx4_counters_ext {
__be32 counter_mode;
__be32 num_ifc;
u32 reserved[2];
__be64 rx_uni_frames;
__be64 rx_uni_bytes;
__be64 rx_mcast_frames;
__be64 rx_mcast_bytes;
__be64 rx_bcast_frames;
__be64 rx_bcast_bytes;
__be64 rx_nobuf_frames;
__be64 rx_nobuf_bytes;
__be64 rx_err_frames;
__be64 rx_err_bytes;
__be64 tx_uni_frames;
__be64 tx_uni_bytes;
__be64 tx_mcast_frames;
__be64 tx_mcast_bytes;
__be64 tx_bcast_frames;
__be64 tx_bcast_bytes;
__be64 tx_nobuf_frames;
__be64 tx_nobuf_bytes;
__be64 tx_err_frames;
__be64 tx_err_bytes;
struct mlx4_if_stat_basic {
struct mlx4_if_stat_control control;
struct {
__be64 IfRxFrames;
__be64 IfRxOctets;
__be64 IfTxFrames;
__be64 IfTxOctets;
} counters[];
};
#define MLX4_IF_STAT_BSC_SZ(ports)(sizeof(struct mlx4_if_stat_extended) +\
sizeof(((struct mlx4_if_stat_extended *)0)->\
counters[0]) * ports)
struct mlx4_if_stat_extended {
struct mlx4_if_stat_control control;
struct {
__be64 IfRxUnicastFrames;
__be64 IfRxUnicastOctets;
__be64 IfRxMulticastFrames;
__be64 IfRxMulticastOctets;
__be64 IfRxBroadcastFrames;
__be64 IfRxBroadcastOctets;
__be64 IfRxNoBufferFrames;
__be64 IfRxNoBufferOctets;
__be64 IfRxErrorFrames;
__be64 IfRxErrorOctets;
__be32 reserved[39];
__be64 IfTxUnicastFrames;
__be64 IfTxUnicastOctets;
__be64 IfTxMulticastFrames;
__be64 IfTxMulticastOctets;
__be64 IfTxBroadcastFrames;
__be64 IfTxBroadcastOctets;
__be64 IfTxDroppedFrames;
__be64 IfTxDroppedOctets;
__be64 IfTxRequestedFramesSent;
__be64 IfTxGeneratedFramesSent;
__be64 IfTxTsoOctets;
} __packed counters[];
};
#define MLX4_IF_STAT_EXT_SZ(ports) (sizeof(struct mlx4_if_stat_extended) +\
sizeof(((struct mlx4_if_stat_extended *)\
0)->counters[0]) * ports)
union mlx4_counter {
struct mlx4_if_stat_control control;
struct mlx4_if_stat_basic basic;
struct mlx4_if_stat_extended ext;
};
#define MLX4_IF_STAT_SZ(ports) MLX4_IF_STAT_EXT_SZ(ports)
struct mlx4_quotas {
int qp;
int cq;
int srq;
int mpt;
int mtt;
int counter;
int xrcd;
};
struct mlx4_dev {
struct pci_dev *pdev;
unsigned long flags;
unsigned long num_slaves;
struct mlx4_caps caps;
struct mlx4_phys_caps phys_caps;
struct mlx4_quotas quotas;
struct radix_tree_root qp_table_tree;
struct radix_tree_root srq_table_tree;
u32 rev_id;
u8 rev_id;
char board_id[MLX4_BOARD_ID_LEN];
int num_vfs;
int numa_node;
int oper_log_mgm_entry_size;
u64 regid_promisc_array[MLX4_MAX_PORTS + 1];
u64 regid_allmulti_array[MLX4_MAX_PORTS + 1];
};
struct mlx4_eqe {
u8 reserved1;
u8 type;
u8 reserved2;
u8 subtype;
union {
u32 raw[6];
struct {
__be32 cqn;
} __packed comp;
struct {
u16 reserved1;
__be16 token;
u32 reserved2;
u8 reserved3[3];
u8 status;
__be64 out_param;
} __packed cmd;
struct {
__be32 qpn;
} __packed qp;
struct {
__be32 srqn;
} __packed srq;
struct {
__be32 cqn;
u32 reserved1;
u8 reserved2[3];
u8 syndrome;
} __packed cq_err;
struct {
u32 reserved1[2];
__be32 port;
} __packed port_change;
struct {
#define COMM_CHANNEL_BIT_ARRAY_SIZE 4
u32 reserved;
u32 bit_vec[COMM_CHANNEL_BIT_ARRAY_SIZE];
} __packed comm_channel_arm;
struct {
u8 port;
u8 reserved[3];
__be64 mac;
} __packed mac_update;
struct {
__be32 slave_id;
} __packed flr_event;
struct {
__be16 current_temperature;
__be16 warning_threshold;
} __packed warming;
struct {
u8 reserved[3];
u8 port;
union {
struct {
__be16 mstr_sm_lid;
__be16 port_lid;
__be32 changed_attr;
u8 reserved[3];
u8 mstr_sm_sl;
__be64 gid_prefix;
} __packed port_info;
struct {
__be32 block_ptr;
__be32 tbl_entries_mask;
} __packed tbl_change_info;
} params;
} __packed port_mgmt_change;
} event;
u8 slave_id;
u8 reserved3[2];
u8 owner;
} __packed;
struct mlx4_init_port_param {
int set_guid0;
int set_node_guid;
@ -492,29 +827,71 @@ struct mlx4_init_port_param {
u64 si_guid;
};
static inline void mlx4_query_steer_cap(struct mlx4_dev *dev, int *log_mac,
int *log_vlan, int *log_prio)
{
*log_mac = dev->caps.log_num_macs;
*log_vlan = dev->caps.log_num_vlans;
*log_prio = dev->caps.log_num_prios;
}
#define mlx4_foreach_port(port, dev, type) \
for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
if ((type) == (dev)->caps.port_mask[(port)])
#define mlx4_foreach_non_ib_transport_port(port, dev) \
for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
if (((dev)->caps.port_mask[port] != MLX4_PORT_TYPE_IB))
#define mlx4_foreach_ib_transport_port(port, dev) \
for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \
((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
#define MLX4_INVALID_SLAVE_ID 0xFF
void handle_port_mgmt_change_event(struct work_struct *work);
static inline int mlx4_master_func_num(struct mlx4_dev *dev)
{
return dev->caps.function;
}
static inline int mlx4_is_master(struct mlx4_dev *dev)
{
return dev->flags & MLX4_FLAG_MASTER;
}
static inline int mlx4_num_reserved_sqps(struct mlx4_dev *dev)
{
return dev->phys_caps.base_sqpn + 8 +
16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev);
}
static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn)
{
return (qpn < dev->phys_caps.base_sqpn + 8 +
16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev));
}
static inline int mlx4_is_guest_proxy(struct mlx4_dev *dev, int slave, u32 qpn)
{
int guest_proxy_base = dev->phys_caps.base_proxy_sqpn + slave * 8;
if (qpn >= guest_proxy_base && qpn < guest_proxy_base + 8)
return 1;
return 0;
}
static inline int mlx4_is_mfunc(struct mlx4_dev *dev)
{
return dev->flags & (MLX4_FLAG_SLAVE | MLX4_FLAG_MASTER);
}
static inline int mlx4_is_slave(struct mlx4_dev *dev)
{
return dev->flags & MLX4_FLAG_SLAVE;
}
int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
struct mlx4_buf *buf);
void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset)
{
if (buf->direct.buf != NULL)
if (BITS_PER_LONG == 64 || buf->nbufs == 1)
return buf->direct.buf + offset;
else
return buf->page_list[offset >> PAGE_SHIFT].buf +
@ -523,31 +900,21 @@ static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset)
int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn);
void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn);
int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn);
void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn);
int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar);
void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar);
int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf);
int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node);
void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf);
int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
struct mlx4_mtt *mtt);
void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt);
u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt);
int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port);
int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port);
int mlx4_mr_reserve_range(struct mlx4_dev *dev, int cnt, int align, u32 *base_mridx);
void mlx4_mr_release_range(struct mlx4_dev *dev, u32 base_mridx, int cnt);
int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
u64 iova, u64 size, u32 access, int npages,
int page_shift, struct mlx4_mr *mr);
int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
int npages, int page_shift, struct mlx4_mr *mr);
void mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr);
void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr);
int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr);
int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
@ -565,16 +932,17 @@ void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres,
int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
unsigned vector, int collapsed);
unsigned vector, int collapsed, int timestamp_en);
void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq);
int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base);
int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
int *base, u8 bf_qp);
void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt);
int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp);
void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp);
int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd,
int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcdn,
struct mlx4_mtt *mtt, u64 db_rec, struct mlx4_srq *srq);
void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq);
int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark);
@ -583,41 +951,185 @@ int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_waterm
int mlx4_INIT_PORT(struct mlx4_dev *dev, int port);
int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port);
int mlx4_unicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
int block_mcast_loopback, enum mlx4_protocol prot);
int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
enum mlx4_protocol prot);
int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
int block_mcast_loopback, enum mlx4_mcast_prot prot);
u8 port, int block_mcast_loopback,
enum mlx4_protocol protocol, u64 *reg_id);
int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
enum mlx4_mcast_prot prot);
enum mlx4_protocol protocol, u64 reg_id);
int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *index);
void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int index);
enum {
MLX4_DOMAIN_UVERBS = 0x1000,
MLX4_DOMAIN_ETHTOOL = 0x2000,
MLX4_DOMAIN_RFS = 0x3000,
MLX4_DOMAIN_NIC = 0x5000,
};
enum mlx4_net_trans_rule_id {
MLX4_NET_TRANS_RULE_ID_ETH = 0,
MLX4_NET_TRANS_RULE_ID_IB,
MLX4_NET_TRANS_RULE_ID_IPV6,
MLX4_NET_TRANS_RULE_ID_IPV4,
MLX4_NET_TRANS_RULE_ID_TCP,
MLX4_NET_TRANS_RULE_ID_UDP,
MLX4_NET_TRANS_RULE_NUM, /* should be last */
};
extern const u16 __sw_id_hw[];
static inline int map_hw_to_sw_id(u16 header_id)
{
int i;
for (i = 0; i < MLX4_NET_TRANS_RULE_NUM; i++) {
if (header_id == __sw_id_hw[i])
return i;
}
return -EINVAL;
}
enum mlx4_net_trans_promisc_mode {
MLX4_FS_REGULAR = 0,
MLX4_FS_ALL_DEFAULT = 1,
MLX4_FS_MC_DEFAULT = 3,
MLX4_FS_UC_SNIFFER = 4,
MLX4_FS_MC_SNIFFER = 5,
};
struct mlx4_spec_eth {
u8 dst_mac[6];
u8 dst_mac_msk[6];
u8 src_mac[6];
u8 src_mac_msk[6];
u8 ether_type_enable;
__be16 ether_type;
__be16 vlan_id_msk;
__be16 vlan_id;
};
struct mlx4_spec_tcp_udp {
__be16 dst_port;
__be16 dst_port_msk;
__be16 src_port;
__be16 src_port_msk;
};
struct mlx4_spec_ipv4 {
__be32 dst_ip;
__be32 dst_ip_msk;
__be32 src_ip;
__be32 src_ip_msk;
};
struct mlx4_spec_ib {
__be32 r_u_qpn;
__be32 qpn_msk;
u8 dst_gid[16];
u8 dst_gid_msk[16];
};
struct mlx4_spec_list {
struct list_head list;
enum mlx4_net_trans_rule_id id;
union {
struct mlx4_spec_eth eth;
struct mlx4_spec_ib ib;
struct mlx4_spec_ipv4 ipv4;
struct mlx4_spec_tcp_udp tcp_udp;
};
};
enum mlx4_net_trans_hw_rule_queue {
MLX4_NET_TRANS_Q_FIFO,
MLX4_NET_TRANS_Q_LIFO,
};
struct mlx4_net_trans_rule {
struct list_head list;
enum mlx4_net_trans_hw_rule_queue queue_mode;
bool exclusive;
bool allow_loopback;
enum mlx4_net_trans_promisc_mode promisc_mode;
u8 port;
u16 priority;
u32 qpn;
};
int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn,
enum mlx4_net_trans_promisc_mode mode);
int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port,
enum mlx4_net_trans_promisc_mode mode);
int mlx4_multicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port);
int mlx4_multicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port);
int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port);
int mlx4_unicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port);
int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode);
int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac);
void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac);
int mlx4_get_base_qpn(struct mlx4_dev *dev, u8 port);
int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac);
void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap);
int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx);
int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
u8 promisc);
int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc);
int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
u8 *pg, u16 *ratelimit);
int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index);
void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan);
int mlx4_map_phys_fmr_fbo(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
u64 *page_list, int npages, u64 iova, u32 fbo,
u32 len, u32 *lkey, u32 *rkey, int same_key);
int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
int npages, u64 iova, u32 *lkey, u32 *rkey);
int mlx4_fmr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
u32 access, int max_pages, int max_maps,
u8 page_shift, struct mlx4_fmr *fmr);
int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
int max_maps, u8 page_shift, struct mlx4_fmr *fmr);
int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
u32 *lkey, u32 *rkey);
int mlx4_fmr_free_reserved(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
int mlx4_SYNC_TPT(struct mlx4_dev *dev);
int mlx4_query_diag_counters(struct mlx4_dev *mlx4_dev, int array_length,
u8 op_modifier, u32 in_offset[], u32 counter_out[]);
int mlx4_test_interrupts(struct mlx4_dev *dev);
u8 op_modifier, u32 in_offset[],
u32 counter_out[]);
void mlx4_get_fc_t11_settings(struct mlx4_dev *dev, int *enable_pre_t11, int *t11_supported);
int mlx4_test_interrupts(struct mlx4_dev *dev);
int mlx4_assign_eq(struct mlx4_dev *dev, char *name, int *vector);
void mlx4_release_eq(struct mlx4_dev *dev, int vec);
int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port);
int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port);
int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx);
void mlx4_counter_free(struct mlx4_dev *dev, u32 idx);
int mlx4_flow_attach(struct mlx4_dev *dev,
struct mlx4_net_trans_rule *rule, u64 *reg_id);
int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id);
void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port,
int i, int val);
int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey);
int mlx4_is_slave_active(struct mlx4_dev *dev, int slave);
int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port);
int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port);
int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr);
int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, u8 port_subtype_change);
enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port);
int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int event, enum slave_port_gen_event *gen_event);
void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid);
__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave);
int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, int *slave_id);
int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id, u8 *gid);
int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn, u32 max_range_qpn);
cycle_t mlx4_read_clock(struct mlx4_dev *dev);
#endif /* MLX4_DEVICE_H */

View File

@ -33,15 +33,22 @@
#ifndef MLX4_DRIVER_H
#define MLX4_DRIVER_H
#include <linux/device.h>
#include <linux/mlx4/device.h>
struct mlx4_dev;
#define MLX4_MAC_MASK 0xffffffffffffULL
#define MLX4_BE_SHORT_MASK cpu_to_be16(0xffff)
#define MLX4_BE_WORD_MASK cpu_to_be32(0xffffffff)
enum mlx4_dev_event {
MLX4_DEV_EVENT_CATASTROPHIC_ERROR,
MLX4_DEV_EVENT_PORT_UP,
MLX4_DEV_EVENT_PORT_DOWN,
MLX4_DEV_EVENT_PORT_REINIT,
MLX4_DEV_EVENT_PORT_MGMT_CHANGE,
MLX4_DEV_EVENT_SLAVE_INIT,
MLX4_DEV_EVENT_SLAVE_SHUTDOWN,
};
enum mlx4_query_reply {
@ -49,11 +56,6 @@ enum mlx4_query_reply {
MLX4_QUERY_MINE_NOPORT = 0
};
enum mlx4_prot {
MLX4_PROT_IB,
MLX4_PROT_EN,
};
enum mlx4_mcast_prot {
MLX4_MCAST_PROT_IB = 0,
MLX4_MCAST_PROT_EN = 1,
@ -63,20 +65,32 @@ struct mlx4_interface {
void * (*add) (struct mlx4_dev *dev);
void (*remove)(struct mlx4_dev *dev, void *context);
void (*event) (struct mlx4_dev *dev, void *context,
enum mlx4_dev_event event, int port);
void * (*get_prot_dev) (struct mlx4_dev *dev, void *context, u8 port);
enum mlx4_prot protocol;
enum mlx4_dev_event event, unsigned long param);
void * (*get_dev)(struct mlx4_dev *dev, void *context, u8 port);
enum mlx4_query_reply (*query) (void *context, void *);
struct list_head list;
enum mlx4_protocol protocol;
};
int mlx4_register_interface(struct mlx4_interface *intf);
void mlx4_unregister_interface(struct mlx4_interface *intf);
void *mlx4_get_prot_dev(struct mlx4_dev *dev, enum mlx4_prot proto, int port);
struct mlx4_dev *mlx4_query_interface(void *, int *port);
void mlx4_set_iboe_counter(struct mlx4_dev *dev, int index, u8 port);
int mlx4_get_iboe_counter(struct mlx4_dev *dev, u8 port);
void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int port);
#ifndef ETH_ALEN
#define ETH_ALEN 6
#endif
static inline u64 mlx4_mac_to_u64(u8 *addr)
{
u64 mac = 0;
int i;
for (i = 0; i < ETH_ALEN; i++) {
mac <<= 8;
mac |= addr[i];
}
return mac;
}
#endif /* MLX4_DRIVER_H */

View File

@ -39,6 +39,15 @@
#define MLX4_INVALID_LKEY 0x100
enum ib_m_qp_attr_mask {
IB_M_EXT_CLASS_1 = 1 << 28,
IB_M_EXT_CLASS_2 = 1 << 29,
IB_M_EXT_CLASS_3 = 1 << 30,
IB_M_QP_MOD_VEND_MASK = (IB_M_EXT_CLASS_1 | IB_M_EXT_CLASS_2 |
IB_M_EXT_CLASS_3)
};
enum mlx4_qp_optpar {
MLX4_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0,
MLX4_QP_OPTPAR_RRE = 1 << 1,
@ -95,11 +104,42 @@ enum {
MLX4_QP_BIT_RWE = 1 << 14,
MLX4_QP_BIT_RAE = 1 << 13,
MLX4_QP_BIT_RIC = 1 << 4,
MLX4_QP_BIT_COLL_SYNC_RQ = 1 << 2,
MLX4_QP_BIT_COLL_SYNC_SQ = 1 << 1,
MLX4_QP_BIT_COLL_MASTER = 1 << 0
};
enum {
MLX4_RSS_HASH_XOR = 0,
MLX4_RSS_HASH_TOP = 1,
MLX4_RSS_UDP_IPV6 = 1 << 0,
MLX4_RSS_UDP_IPV4 = 1 << 1,
MLX4_RSS_TCP_IPV6 = 1 << 2,
MLX4_RSS_IPV6 = 1 << 3,
MLX4_RSS_TCP_IPV4 = 1 << 4,
MLX4_RSS_IPV4 = 1 << 5,
/* offset of mlx4_rss_context within mlx4_qp_context.pri_path */
MLX4_RSS_OFFSET_IN_QPC_PRI_PATH = 0x24,
/* offset of being RSS indirection QP within mlx4_qp_context.flags */
MLX4_RSS_QPC_FLAG_OFFSET = 13,
};
struct mlx4_rss_context {
__be32 base_qpn;
__be32 default_qpn;
u16 reserved;
u8 hash_fn;
u8 flags;
__be32 rss_key[10];
__be32 base_qpn_udp;
};
struct mlx4_qp_path {
u8 fl;
u8 reserved1[2];
u8 reserved1[1];
u8 disable_pkey_check;
u8 pkey_index;
u8 counter_index;
u8 grh_mylmc;
@ -112,7 +152,8 @@ struct mlx4_qp_path {
u8 rgid[16];
u8 sched_queue;
u8 vlan_index;
u8 reserved3[2];
u8 feup;
u8 reserved3;
u8 reserved4[2];
u8 dmac[6];
};
@ -153,16 +194,7 @@ struct mlx4_qp_context {
u8 reserved4[2];
u8 mtt_base_addr_h;
__be32 mtt_base_addr_l;
u8 VE;
u8 reserved5;
__be16 VFT_id_prio;
u8 reserved6;
u8 exch_size;
__be16 exch_base;
u8 VFT_hop_cnt;
u8 my_fc_id_idx;
__be16 reserved7;
u32 reserved8[7];
u32 reserved5[10];
};
/* Which firmware version adds support for NEC (NoErrorCompletion) bit */
@ -192,8 +224,12 @@ struct mlx4_wqe_ctrl_seg {
* [4] IP checksum
* [3:2] C (generate completion queue entry)
* [1] SE (solicited event)
* [0] FL (force loopback)
*/
__be32 srcrb_flags;
union {
__be32 srcrb_flags;
__be16 srcrb_flags16[2];
};
/*
* imm is immediate data for send/RDMA write w/ immediate;
* also invalidation key for send with invalidate; input
@ -204,15 +240,15 @@ struct mlx4_wqe_ctrl_seg {
enum {
MLX4_WQE_MLX_VL15 = 1 << 17,
MLX4_WQE_MLX_SLR = 1 << 16,
MLX4_WQE_MLX_ICRC = 1 << 4
MLX4_WQE_MLX_SLR = 1 << 16
};
struct mlx4_wqe_mlx_seg {
u8 owner;
u8 reserved1[2];
u8 opcode;
u8 reserved2[3];
__be16 sched_prio;
u8 reserved2;
u8 size;
/*
* [17] VL15
@ -338,9 +374,6 @@ static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn)
return radix_tree_lookup(&dev->qp_table_tree, qpn & (dev->caps.num_qps - 1));
}
struct mlx4_qp *mlx4_qp_lookup_lock(struct mlx4_dev *dev, u32 qpn);
void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp);
int mlx4_qp_get_region(struct mlx4_dev *dev, enum mlx4_qp_region region,
int *base_qpn, int *cnt);
#endif /* MLX4_QP_H */

View File

@ -33,22 +33,10 @@
#ifndef MLX4_SRQ_H
#define MLX4_SRQ_H
#include <linux/types.h>
#include <linux/mlx4/device.h>
struct mlx4_wqe_srq_next_seg {
u16 reserved1;
__be16 next_wqe_index;
u32 reserved2[3];
};
void mlx4_srq_invalidate(struct mlx4_dev *dev, struct mlx4_srq *srq);
void mlx4_srq_remove(struct mlx4_dev *dev, struct mlx4_srq *srq);
static inline struct mlx4_srq *__mlx4_srq_lookup(struct mlx4_dev *dev, u32 srqn)
{
return radix_tree_lookup(&dev->srq_table_tree,
srqn & (dev->caps.num_srqs - 1));
}
#endif /* MLX4_SRQ_H */

View File

@ -87,6 +87,9 @@ param_sysinit(struct kernel_param *param)
#define module_param(var, type, mode) \
module_param_named(var, var, type, mode)
#define module_param_array(var, type, addr_argc, mode) \
module_param_named(var, var, type, mode)
#define MODULE_PARM_DESC(name, desc)
static inline int

View File

@ -72,6 +72,9 @@ struct pci_device_id {
#define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c
#define PCI_DEVICE_ID_MELLANOX_SINAI 0x6274
#define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07))
#define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f)
#define PCI_FUNC(devfn) ((devfn) & 0x07)
#define PCI_VDEVICE(_vendor, _device) \
.vendor = PCI_VENDOR_ID_##_vendor, .device = (_device), \
@ -93,14 +96,18 @@ struct pci_device_id {
struct pci_dev;
struct pci_driver {
struct list_head links;
char *name;
struct pci_device_id *id_table;
int (*probe)(struct pci_dev *dev, const struct pci_device_id *id);
void (*remove)(struct pci_dev *dev);
int (*suspend) (struct pci_dev *dev, pm_message_t state); /* Device suspended */
int (*resume) (struct pci_dev *dev); /* Device woken up */
driver_t driver;
devclass_t bsdclass;
struct pci_error_handlers *err_handler;
};
extern struct list_head pci_drivers;
@ -117,6 +124,9 @@ struct pci_dev {
uint16_t device;
uint16_t vendor;
unsigned int irq;
unsigned int devfn;
u8 revision;
struct pci_devinfo *bus; /* bus this device is on, equivalent to linux struct pci_bus */
};
static inline struct resource_list_entry *
@ -296,6 +306,7 @@ pci_disable_msix(struct pci_dev *pdev)
#define PCI_CAP_ID_EXP PCIY_EXPRESS
#define PCI_CAP_ID_PCIX PCIY_PCIX
static inline int
pci_find_capability(struct pci_dev *pdev, int capid)
{
@ -306,6 +317,26 @@ pci_find_capability(struct pci_dev *pdev, int capid)
return (reg);
}
/**
* pci_pcie_cap - get the saved PCIe capability offset
* @dev: PCI device
*
* PCIe capability offset is calculated at PCI device initialization
* time and saved in the data structure. This function returns saved
* PCIe capability offset. Using this instead of pci_find_capability()
* reduces unnecessary search in the PCI configuration space. If you
* need to calculate PCIe capability offset from raw device for some
* reasons, please use pci_find_capability() instead.
*/
static inline int pci_pcie_cap(struct pci_dev *dev)
{
return pci_find_capability(dev, PCI_CAP_ID_EXP);
}
static inline int
pci_read_config_byte(struct pci_dev *pdev, int where, u8 *val)
{
@ -529,6 +560,30 @@ pci_enable_msix(struct pci_dev *pdev, struct msix_entry *entries, int nreq)
return (0);
}
static inline int pci_channel_offline(struct pci_dev *pdev)
{
return false;
}
static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn)
{
return -ENODEV;
}
static inline void pci_disable_sriov(struct pci_dev *dev)
{
}
/**
* DEFINE_PCI_DEVICE_TABLE - macro used to describe a pci device table
* @_table: device table name
*
* This macro is used to create a struct pci_device_id array (a device table)
* in a generic manner.
*/
#define DEFINE_PCI_DEVICE_TABLE(_table) \
const struct pci_device_id _table[] __devinitdata
/* XXX This should not be necessary. */
#define pcix_set_mmrbc(d, v) 0
#define pcix_get_max_mmrbc(d) 0
@ -578,5 +633,57 @@ pci_enable_msix(struct pci_dev *pdev, struct msix_entry *entries, int nreq)
#define pci_unmap_len dma_unmap_len
#define pci_unmap_len_set dma_unmap_len_set
typedef unsigned int __bitwise pci_channel_state_t;
typedef unsigned int __bitwise pci_ers_result_t;
enum pci_channel_state {
/* I/O channel is in normal state */
pci_channel_io_normal = (__force pci_channel_state_t) 1,
/* I/O to channel is blocked */
pci_channel_io_frozen = (__force pci_channel_state_t) 2,
/* PCI card is dead */
pci_channel_io_perm_failure = (__force pci_channel_state_t) 3,
};
enum pci_ers_result {
/* no result/none/not supported in device driver */
PCI_ERS_RESULT_NONE = (__force pci_ers_result_t) 1,
/* Device driver can recover without slot reset */
PCI_ERS_RESULT_CAN_RECOVER = (__force pci_ers_result_t) 2,
/* Device driver wants slot to be reset. */
PCI_ERS_RESULT_NEED_RESET = (__force pci_ers_result_t) 3,
/* Device has completely failed, is unrecoverable */
PCI_ERS_RESULT_DISCONNECT = (__force pci_ers_result_t) 4,
/* Device driver is fully recovered and operational */
PCI_ERS_RESULT_RECOVERED = (__force pci_ers_result_t) 5,
};
/* PCI bus error event callbacks */
struct pci_error_handlers {
/* PCI bus error detected on this device */
pci_ers_result_t (*error_detected)(struct pci_dev *dev,
enum pci_channel_state error);
/* MMIO has been re-enabled, but not DMA */
pci_ers_result_t (*mmio_enabled)(struct pci_dev *dev);
/* PCI Express link has been reset */
pci_ers_result_t (*link_reset)(struct pci_dev *dev);
/* PCI slot has been reset */
pci_ers_result_t (*slot_reset)(struct pci_dev *dev);
/* Device driver may resume normal operations */
void (*resume)(struct pci_dev *dev);
};
#endif /* _LINUX_PCI_H_ */

View File

@ -105,6 +105,10 @@ sysctl_handle_attr(SYSCTL_HANDLER_ARGS)
/* Trim trailing newline. */
buf[len] = '\0';
}
/* Trim trailing newline. */
len--;
((char*)buf)[len] = '\0';
}
/* Leave one trailing byte to append a newline. */
@ -185,4 +189,6 @@ sysfs_remove_dir(struct kobject *kobj)
sysctl_remove_oid(kobj->oidp, 1, 1);
}
#define sysfs_attr_init(attr) do {} while(0)
#endif /* _LINUX_SYSFS_H_ */

View File

@ -45,6 +45,8 @@ typedef _Bool bool;
#define false FALSE
#endif
typedef u64 phys_addr_t;
typedef unsigned long kernel_ulong_t;
typedef unsigned int uint;
typedef unsigned gfp_t;

View File

@ -80,7 +80,7 @@ do { \
callout_init(&(_work)->timer, CALLOUT_MPSAFE); \
} while (0)
#define INIT_DELAYED_WORK_DEFERRABLE INIT_DELAYED_WORK
#define INIT_DEFERRABLE_WORK INIT_DELAYED_WORK
#define schedule_work(work) \
do { \
@ -121,6 +121,14 @@ queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *work,
return (!pending);
}
static inline bool schedule_delayed_work(struct delayed_work *dwork,
unsigned long delay)
{
struct workqueue_struct wq;
wq.taskqueue = taskqueue_thread;
return queue_delayed_work(&wq, dwork, delay);
}
static inline struct workqueue_struct *
_create_workqueue_common(char *name, int cpus)
{
@ -190,4 +198,15 @@ cancel_delayed_work(struct delayed_work *work)
return 0;
}
static inline int
cancel_delayed_work_sync(struct delayed_work *work)
{
callout_drain(&work->timer);
if (work->work.taskqueue &&
taskqueue_cancel(work->work.taskqueue, &work->work.work_task, NULL))
taskqueue_drain(work->work.taskqueue, &work->work.work_task);
return 0;
}
#endif /* _LINUX_WORKQUEUE_H_ */

View File

@ -38,6 +38,9 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_sa.h>
/* ib_cm and ib_user_cm modules share /sys/class/infiniband_cm */
extern struct class cm_class;
enum ib_cm_state {
IB_CM_IDLE,
IB_CM_LISTEN,
@ -259,6 +262,18 @@ struct ib_cm_event {
void *private_data;
};
#define CM_REQ_ATTR_ID cpu_to_be16(0x0010)
#define CM_MRA_ATTR_ID cpu_to_be16(0x0011)
#define CM_REJ_ATTR_ID cpu_to_be16(0x0012)
#define CM_REP_ATTR_ID cpu_to_be16(0x0013)
#define CM_RTU_ATTR_ID cpu_to_be16(0x0014)
#define CM_DREQ_ATTR_ID cpu_to_be16(0x0015)
#define CM_DREP_ATTR_ID cpu_to_be16(0x0016)
#define CM_SIDR_REQ_ATTR_ID cpu_to_be16(0x0017)
#define CM_SIDR_REP_ATTR_ID cpu_to_be16(0x0018)
#define CM_LAP_ATTR_ID cpu_to_be16(0x0019)
#define CM_APR_ATTR_ID cpu_to_be16(0x001A)
/**
* ib_cm_handler - User-defined callback to process communication events.
* @cm_id: Communication identifier associated with the reported event.

View File

@ -151,7 +151,7 @@ struct ib_rmpp_hdr {
typedef u64 __bitwise ib_sa_comp_mask;
#define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << n))
#define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << (n)))
/*
* ib_sa_hdr and ib_sa_mad structures must be packed because they have

Some files were not shown because too many files have changed in this diff Show More