Update mlx4ib(4) to Linux 4.9.

Sponsored by:	Mellanox Technologies
This commit is contained in:
Hans Petter Selasky 2017-11-13 10:49:18 +00:00
parent 059ecd56d0
commit 8cc487045e
17 changed files with 3997 additions and 3988 deletions

View File

@ -4550,8 +4550,6 @@ dev/mlx4/mlx4_ib/mlx4_ib_mad.c optional mlx4ib pci ofed \
compile-with "${OFED_C}"
dev/mlx4/mlx4_ib/mlx4_ib_main.c optional mlx4ib pci ofed \
compile-with "${OFED_C}"
dev/mlx4/mlx4_ib/mlx4_ib_exp.c optional mlx4ib pci ofed \
compile-with "${OFED_C}"
dev/mlx4/mlx4_ib/mlx4_ib_mr.c optional mlx4ib pci ofed \
compile-with "${OFED_C}"
dev/mlx4/mlx4_ib/mlx4_ib_qp.c optional mlx4ib pci ofed \

View File

@ -1,46 +0,0 @@
/*
* Copyright (c) 2006, 2007 Cisco Systems. All rights reserved.
* Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MLX4_EXP_H
#define MLX4_EXP_H
#include <rdma/ib_verbs_exp.h>
#include "mlx4_ib.h"
struct ib_qp *mlx4_ib_exp_create_qp(struct ib_pd *pd,
struct ib_exp_qp_init_attr *init_attr,
struct ib_udata *udata);
int mlx4_ib_exp_query_device(struct ib_device *ibdev,
struct ib_exp_device_attr *props);
#endif /* MLX4_EXP_H */

View File

@ -41,6 +41,8 @@
#include <linux/rbtree.h>
#include <linux/notifier.h>
#include <asm/atomic64.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_mad.h>
@ -59,9 +61,6 @@
#define mlx4_ib_warn(ibdev, format, arg...) \
dev_warn((ibdev)->dma_device, MLX4_IB_DRV_NAME ": " format, ## arg)
#define mlx4_ib_info(ibdev, format, arg...) \
dev_info((ibdev)->dma_device, MLX4_IB_DRV_NAME ": " format, ## arg)
enum {
MLX4_IB_SQ_MIN_WQE_SHIFT = 6,
MLX4_IB_MAX_HEADROOM = 2048
@ -75,17 +74,25 @@ extern int mlx4_ib_sm_guid_assign;
extern struct proc_dir_entry *mlx4_mrs_dir_entry;
#define MLX4_IB_UC_STEER_QPN_ALIGN 1
#define MLX4_IB_UC_MAX_NUM_QPS (256 * 1024)
#define MLX4_IB_UC_MAX_NUM_QPS 256
enum hw_bar_type {
HW_BAR_BF,
HW_BAR_DB,
HW_BAR_CLOCK,
HW_BAR_COUNT
};
#define MLX4_IB_MMAP_CMD_MASK 0xFF
#define MLX4_IB_MMAP_CMD_BITS 8
struct mlx4_ib_vma_private_data {
struct vm_area_struct *vma;
};
struct mlx4_ib_ucontext {
struct ib_ucontext ibucontext;
struct mlx4_uar uar;
struct list_head db_page_list;
struct mutex db_page_mutex;
struct mlx4_ib_vma_private_data hw_bar_info[HW_BAR_COUNT];
};
struct mlx4_ib_pd {
@ -111,11 +118,6 @@ struct mlx4_ib_cq_resize {
int cqe;
};
struct mlx4_shared_mr_info {
int mr_id;
struct ib_umem *umem;
};
struct mlx4_ib_cq {
struct ib_cq ibcq;
struct mlx4_cq mcq;
@ -127,15 +129,22 @@ struct mlx4_ib_cq {
struct ib_umem *umem;
struct ib_umem *resize_umem;
int create_flags;
/* List of qps that it serves.*/
struct list_head send_qp_list;
struct list_head recv_qp_list;
};
#define MLX4_MR_PAGES_ALIGN 0x40
struct mlx4_ib_mr {
struct ib_mr ibmr;
__be64 *pages;
dma_addr_t page_map;
u32 npages;
u32 max_pages;
struct mlx4_mr mmr;
struct ib_umem *umem;
struct mlx4_shared_mr_info *smr_info;
atomic_t invalidated;
struct completion invalidation_comp;
size_t page_map_size;
};
struct mlx4_ib_mw {
@ -143,21 +152,22 @@ struct mlx4_ib_mw {
struct mlx4_mw mmw;
};
struct mlx4_ib_fast_reg_page_list {
struct ib_fast_reg_page_list ibfrpl;
__be64 *mapped_page_list;
dma_addr_t map;
};
struct mlx4_ib_fmr {
struct ib_fmr ibfmr;
struct mlx4_fmr mfmr;
};
#define MAX_REGS_PER_FLOW 2
struct mlx4_flow_reg_id {
u64 id;
u64 mirror;
};
struct mlx4_ib_flow {
struct ib_flow ibflow;
/* translating DMFS verbs sniffer rule to FW API requires two reg IDs */
u64 reg_id[2];
struct mlx4_flow_reg_id reg_id[MAX_REGS_PER_FLOW];
};
struct mlx4_ib_wq {
@ -172,13 +182,18 @@ struct mlx4_ib_wq {
unsigned tail;
};
enum {
MLX4_IB_QP_CREATE_ROCE_V2_GSI = IB_QP_CREATE_RESERVED_START
};
enum mlx4_ib_qp_flags {
MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
MLX4_IB_QP_CAP_CROSS_CHANNEL = IB_QP_CREATE_CROSS_CHANNEL,
MLX4_IB_QP_CAP_MANAGED_SEND = IB_QP_CREATE_MANAGED_SEND,
MLX4_IB_QP_CAP_MANAGED_RECV = IB_QP_CREATE_MANAGED_RECV,
MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO,
/* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */
MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI,
MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
MLX4_IB_SRIOV_SQP = 1 << 31,
};
@ -190,13 +205,6 @@ struct mlx4_ib_gid_entry {
u8 port;
};
enum mlx4_ib_mmap_cmd {
MLX4_IB_MMAP_UAR_PAGE = 0,
MLX4_IB_MMAP_BLUE_FLAME_PAGE = 1,
MLX4_IB_MMAP_GET_CONTIGUOUS_PAGES = 2,
MLX4_IB_MMAP_GET_HW_CLOCK = 3,
};
enum mlx4_ib_qp_type {
/*
* IB_QPT_SMI and IB_QPT_GSI have to be the first two entries
@ -287,17 +295,6 @@ struct mlx4_roce_smac_vlan_info {
int update_vid;
};
struct mlx4_ib_qpg_data {
unsigned long *tss_bitmap;
unsigned long *rss_bitmap;
struct mlx4_ib_qp *qpg_parent;
int tss_qpn_base;
int rss_qpn_base;
u32 tss_child_count;
u32 rss_child_count;
u32 qpg_tss_mask_sz;
};
struct mlx4_ib_qp {
struct ib_qp ibqp;
struct mlx4_qp mqp;
@ -327,22 +324,16 @@ struct mlx4_ib_qp {
u8 sq_no_prefetch;
u8 state;
int mlx_type;
enum ib_qpg_type qpg_type;
struct mlx4_ib_qpg_data *qpg_data;
struct list_head gid_list;
struct list_head steering_rules;
struct mlx4_ib_buf *sqp_proxy_rcv;
struct mlx4_roce_smac_vlan_info pri;
struct mlx4_roce_smac_vlan_info alt;
struct list_head rules_list;
u64 reg_id;
int max_inline_data;
struct mlx4_bf bf;
/*
* Experimental data
*/
int max_inlr_data;
struct list_head qps_list;
struct list_head cq_recv_list;
struct list_head cq_send_list;
struct counter_index *counter_index;
};
struct mlx4_ib_srq {
@ -376,23 +367,12 @@ struct mlx4_ib_ah {
#define MLX4_NOT_SET_GUID (0x00LL)
#define MLX4_GUID_FOR_DELETE_VAL (~(0x00LL))
/****************************************/
/* ioctl codes */
/****************************************/
#define MLX4_IOC_MAGIC 'm'
#define MLX4_IOCHWCLOCKOFFSET _IOR(MLX4_IOC_MAGIC, 1, int)
enum mlx4_guid_alias_rec_status {
MLX4_GUID_INFO_STATUS_IDLE,
MLX4_GUID_INFO_STATUS_SET,
MLX4_GUID_INFO_STATUS_PENDING,
};
enum mlx4_guid_alias_rec_ownership {
MLX4_GUID_DRIVER_ASSIGN,
MLX4_GUID_SYSADMIN_ASSIGN,
MLX4_GUID_NONE_ASSIGN, /*init state of each record*/
};
#define GUID_STATE_NEED_PORT_INIT 0x01
enum mlx4_guid_alias_rec_method {
MLX4_GUID_INFO_RECORD_SET = IB_MGMT_METHOD_SET,
@ -403,8 +383,8 @@ struct mlx4_sriov_alias_guid_info_rec_det {
u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC];
ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/
enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/
u8 method; /*set or delete*/
enum mlx4_guid_alias_rec_ownership ownership; /*indicates who assign that alias_guid record*/
unsigned int guids_retry_schedule[NUM_ALIAS_GUID_IN_REC];
u64 time_to_run;
};
struct mlx4_sriov_alias_guid_port_rec_det {
@ -412,6 +392,7 @@ struct mlx4_sriov_alias_guid_port_rec_det {
struct workqueue_struct *wq;
struct delayed_work alias_guid_work;
u8 port;
u32 state_flags;
struct mlx4_sriov_alias_guid *parent;
struct list_head cb_list;
};
@ -461,7 +442,6 @@ struct mlx4_ib_demux_pv_ctx {
struct ib_device *ib_dev;
struct ib_cq *cq;
struct ib_pd *pd;
struct ib_mr *mr;
struct work_struct work;
struct workqueue_struct *wq;
struct mlx4_ib_demux_pv_qp qp[2];
@ -473,7 +453,7 @@ struct mlx4_ib_demux_ctx {
struct workqueue_struct *wq;
struct workqueue_struct *ud_wq;
spinlock_t ud_lock;
__be64 subnet_prefix;
atomic64_t subnet_prefix;
__be64 guid_cache[128];
struct mlx4_ib_dev *dev;
/* the following lock protects both mcg_table and mcg_mgid0_list */
@ -503,13 +483,27 @@ struct mlx4_ib_sriov {
struct idr pv_id_table;
};
struct gid_cache_context {
int real_index;
int refcount;
};
struct gid_entry {
union ib_gid gid;
enum ib_gid_type gid_type;
struct gid_cache_context *ctx;
};
struct mlx4_port_gid_table {
struct gid_entry gids[MLX4_MAX_PORT_GIDS];
};
struct mlx4_ib_iboe {
spinlock_t lock;
struct net_device *netdevs[MLX4_MAX_PORTS];
struct net_device *masters[MLX4_MAX_PORTS];
struct notifier_block nb;
struct notifier_block nb_inet;
union ib_gid gid_table[MLX4_MAX_PORTS][128];
atomic64_t mac[MLX4_MAX_PORTS];
struct notifier_block nb;
struct mlx4_port_gid_table gids[MLX4_MAX_PORTS];
};
struct pkey_mgt {
@ -548,15 +542,32 @@ struct mlx4_ib_iov_port {
struct mlx4_ib_iov_sysfs_attr mcg_dentry;
};
struct mlx4_ib_counter {
int counter_index;
int status;
struct counter_index {
struct list_head list;
u32 index;
u8 allocated;
};
struct mlx4_ib_counters {
struct list_head counters_list;
struct mutex mutex; /* mutex for accessing counters list */
u32 default_counter;
};
#define MLX4_DIAG_COUNTERS_TYPES 2
struct mlx4_ib_diag_counters {
const char **name;
u32 *offset;
u32 num_counters;
};
struct mlx4_ib_dev {
struct ib_device ib_dev;
struct mlx4_dev *dev;
int num_ports;
void __iomem *uar_map;
struct mlx4_uar priv_uar;
u32 priv_pdn;
MLX4_DECLARE_DOORBELL_LOCK(uar_lock);
@ -564,14 +575,14 @@ struct mlx4_ib_dev {
struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2];
struct ib_ah *sm_ah[MLX4_MAX_PORTS];
spinlock_t sm_lock;
atomic64_t sl2vl[MLX4_MAX_PORTS];
struct mlx4_ib_sriov sriov;
struct mutex cap_mask_mutex;
bool ib_active;
struct mlx4_ib_iboe iboe;
struct mlx4_ib_counter counters[MLX4_MAX_PORTS];
struct mlx4_ib_counters counters_table[MLX4_MAX_PORTS];
int *eq_table;
int eq_added;
struct kobject *iov_parent;
struct kobject *ports_parent;
struct kobject *dev_ports_parent[MLX4_MFUNC_MAX];
@ -580,12 +591,22 @@ struct mlx4_ib_dev {
unsigned long *ib_uc_qpns_bitmap;
int steer_qpn_count;
int steer_qpn_base;
int steering_support;
struct mlx4_ib_qp *qp1_proxy[MLX4_MAX_PORTS];
/* lock when destroying qp1_proxy and getting netdev events */
struct mutex qp1_proxy_lock[MLX4_MAX_PORTS];
u8 bond_next_port;
/* protect resources needed as part of reset flow */
spinlock_t reset_flow_resource_lock;
struct list_head qp_list;
struct mlx4_ib_diag_counters diag_counters[MLX4_DIAG_COUNTERS_TYPES];
};
struct ib_event_work {
struct work_struct work;
struct mlx4_ib_dev *ib_dev;
struct mlx4_eqe ib_eqe;
int port;
};
struct mlx4_ib_qp_tunnel_init_attr {
@ -595,6 +616,21 @@ struct mlx4_ib_qp_tunnel_init_attr {
u8 port;
};
struct mlx4_uverbs_ex_query_device {
__u32 comp_mask;
__u32 reserved;
};
enum query_device_resp_mask {
QUERY_DEVICE_RESP_MASK_TIMESTAMP = 1UL << 0,
};
struct mlx4_uverbs_ex_query_device_resp {
__u32 comp_mask;
__u32 response_length;
__u64 hca_core_clock_offset;
};
static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
{
return container_of(ibdev, struct mlx4_ib_dev, ib_dev);
@ -635,11 +671,6 @@ static inline struct mlx4_ib_mw *to_mmw(struct ib_mw *ibmw)
return container_of(ibmw, struct mlx4_ib_mw, ibmw);
}
static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
{
return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl);
}
static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
{
return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr);
@ -675,6 +706,13 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah)
return container_of(ibah, struct mlx4_ib_ah, ibah);
}
static inline u8 mlx4_ib_bond_next_port(struct mlx4_ib_dev *dev)
{
dev->bond_next_port = (dev->bond_next_port + 1) % dev->num_ports;
return dev->bond_next_port + 1;
}
int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev);
void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev);
@ -685,30 +723,22 @@ void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db)
struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc);
int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem);
int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
u64 start_va,
int *num_of_mtts);
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata, int mr_id);
struct ib_udata *udata);
int mlx4_ib_dereg_mr(struct ib_mr *mr);
struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
struct ib_mw_bind *mw_bind);
struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
struct ib_udata *udata);
int mlx4_ib_dealloc_mw(struct ib_mw *mw);
struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
int max_page_list_len);
struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
int page_list_len);
void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
int mlx4_ib_modify_cq(struct ib_cq *cq,
struct ib_cq_attr *cq_attr,
int cq_attr_mask);
struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg);
int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
int mlx4_ib_ignore_overrun_cq(struct ib_cq *ibcq);
struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
struct ib_cq_init_attr *attr,
const struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
struct ib_udata *udata);
int mlx4_ib_destroy_cq(struct ib_cq *cq);
@ -746,11 +776,13 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
void *in_mad, void *response_mad);
int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const void *in_mad, void *response_mad);
int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
struct ib_wc *in_wc, struct ib_grh *in_grh,
struct ib_mad *in_mad, struct ib_mad *out_mad);
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const struct ib_mad_hdr *in, size_t in_mad_size,
struct ib_mad_hdr *out, size_t *out_mad_size,
u16 *out_mad_pkey_index);
int mlx4_ib_mad_init(struct mlx4_ib_dev *dev);
void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev);
@ -768,28 +800,15 @@ int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid, int netw_view);
int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
u8 *mac, int *is_mcast, u8 port);
int mlx4_ib_query_if_stat(struct mlx4_ib_dev *dev, u32 counter_index,
union mlx4_counter *counter, u8 clear);
static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
static inline bool mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
{
u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3;
if (rdma_port_get_link_layer(ah->ibah.device, port) == IB_LINK_LAYER_ETHERNET)
return 1;
return true;
return !!(ah->av.ib.g_slid & 0x80);
}
static inline int mlx4_ib_qp_has_rq(struct ib_qp_init_attr *attr)
{
if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT)
return 0;
return !attr->srq;
}
int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx);
void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq);
@ -815,13 +834,16 @@ void mlx4_ib_tunnels_update_work(struct work_struct *work);
int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type qpt, struct ib_wc *wc,
struct ib_grh *grh, struct ib_mad *mad);
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
u32 qkey, struct ib_ah_attr *attr, u8 *s_mac, struct ib_mad *mad);
u32 qkey, struct ib_ah_attr *attr, u8 *s_mac,
u16 vlan_id, struct ib_mad *mad);
__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx);
int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
struct ib_mad *mad, int is_eth);
struct ib_mad *mad);
int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
struct ib_mad *mad);
@ -848,6 +870,8 @@ int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
struct attribute *attr);
ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index);
void mlx4_ib_slave_alias_guid_event(struct mlx4_ib_dev *dev, int slave,
int port, int slave_init);
int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ;
@ -859,7 +883,16 @@ int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn);
void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count);
int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
int is_attach);
int mlx4_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props);
int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
u64 start, u64 length, u64 virt_addr,
int mr_access_flags, struct ib_pd *pd,
struct ib_udata *udata);
int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
u8 port_num, int index);
void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
int port);
void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port);
#endif /* MLX4_IB_H */

View File

@ -38,28 +38,10 @@
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/etherdevice.h>
#include "mlx4_ib.h"
int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
u8 *mac, int *is_mcast, u8 port)
{
struct in6_addr in6;
*is_mcast = 0;
memcpy(&in6, ah_attr->grh.dgid.raw, sizeof in6);
if (rdma_link_local_addr(&in6))
rdma_get_ll_mac(&in6, mac);
else if (rdma_is_multicast_addr(&in6)) {
rdma_get_mcast_mac(&in6, mac);
*is_mcast = 1;
} else
return -EINVAL;
return 0;
}
static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
struct mlx4_ib_ah *ah)
{
@ -67,6 +49,7 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
ah->av.ib.g_slid = ah_attr->src_path_bits;
ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
if (ah_attr->ah_flags & IB_AH_GRH) {
ah->av.ib.g_slid |= 0x80;
ah->av.ib.gid_index = ah_attr->grh.sgid_index;
@ -84,7 +67,6 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
!(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support))
--ah->av.ib.stat_rate;
}
ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
return &ah->ibah;
}
@ -96,21 +78,38 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
struct mlx4_dev *dev = ibdev->dev;
int is_mcast = 0;
struct in6_addr in6;
u16 vlan_tag;
u16 vlan_tag = 0xffff;
union ib_gid sgid;
struct ib_gid_attr gid_attr;
int ret;
memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
if (rdma_is_multicast_addr(&in6)) {
is_mcast = 1;
resolve_mcast_mac(&in6, ah->av.eth.mac);
rdma_get_mcast_mac(&in6, ah->av.eth.mac);
} else {
memcpy(ah->av.eth.mac, ah_attr->dmac, 6);
memcpy(ah->av.eth.mac, ah_attr->dmac, ETH_ALEN);
}
ret = ib_get_cached_gid(pd->device, ah_attr->port_num,
ah_attr->grh.sgid_index, &sgid, &gid_attr);
if (ret)
return ERR_PTR(ret);
eth_zero_addr(ah->av.eth.s_mac);
if (gid_attr.ndev) {
if (is_vlan_dev(gid_attr.ndev))
vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
memcpy(ah->av.eth.s_mac, IF_LLADDR(gid_attr.ndev), ETH_ALEN);
dev_put(gid_attr.ndev);
}
vlan_tag = ah_attr->vlan_id;
if (vlan_tag < 0x1000)
vlan_tag |= (ah_attr->sl & 7) << 13;
ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
ah->av.eth.gid_index = ah_attr->grh.sgid_index;
ret = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index);
if (ret < 0)
return ERR_PTR(ret);
ah->av.eth.gid_index = ret;
ah->av.eth.vlan = cpu_to_be16(vlan_tag);
ah->av.eth.hop_limit = ah_attr->grh.hop_limit;
if (ah_attr->static_rate) {
ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
@ -168,9 +167,13 @@ int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
enum rdma_link_layer ll;
memset(ah_attr, 0, sizeof *ah_attr);
ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24;
ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num);
if (ll == IB_LINK_LAYER_ETHERNET)
ah_attr->sl = be32_to_cpu(ah->av.eth.sl_tclass_flowlabel) >> 29;
else
ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
ah_attr->dlid = ll == IB_LINK_LAYER_INFINIBAND ? be16_to_cpu(ah->av.ib.dlid) : 0;
if (ah->av.ib.stat_rate)
ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET;

View File

@ -42,6 +42,8 @@
#include <linux/errno.h>
#include <rdma/ib_user_verbs.h>
#include <linux/delay.h>
#include <linux/math64.h>
#include <linux/ktime.h>
#include "mlx4_ib.h"
/*
@ -57,15 +59,19 @@ struct mlx4_alias_guid_work_context {
int query_id;
struct list_head list;
int block_num;
ib_sa_comp_mask guid_indexes;
u8 method;
};
struct mlx4_next_alias_guid_work {
u8 port;
u8 block_num;
u8 method;
struct mlx4_sriov_alias_guid_info_rec_det rec_det;
};
static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
int *resched_delay_sec);
void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
u8 port_num, u8 *p_data)
@ -119,6 +125,57 @@ ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index)
return IB_SA_COMP_MASK(4 + index);
}
void mlx4_ib_slave_alias_guid_event(struct mlx4_ib_dev *dev, int slave,
int port, int slave_init)
{
__be64 curr_guid, required_guid;
int record_num = slave / 8;
int index = slave % 8;
int port_index = port - 1;
unsigned long flags;
int do_work = 0;
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
if (dev->sriov.alias_guid.ports_guid[port_index].state_flags &
GUID_STATE_NEED_PORT_INIT)
goto unlock;
if (!slave_init) {
curr_guid = *(__be64 *)&dev->sriov.
alias_guid.ports_guid[port_index].
all_rec_per_port[record_num].
all_recs[GUID_REC_SIZE * index];
if (curr_guid == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL) ||
!curr_guid)
goto unlock;
required_guid = cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
} else {
required_guid = mlx4_get_admin_guid(dev->dev, slave, port);
if (required_guid == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
goto unlock;
}
*(__be64 *)&dev->sriov.alias_guid.ports_guid[port_index].
all_rec_per_port[record_num].
all_recs[GUID_REC_SIZE * index] = required_guid;
dev->sriov.alias_guid.ports_guid[port_index].
all_rec_per_port[record_num].guid_indexes
|= mlx4_ib_get_aguid_comp_mask_from_ix(index);
dev->sriov.alias_guid.ports_guid[port_index].
all_rec_per_port[record_num].status
= MLX4_GUID_INFO_STATUS_IDLE;
/* set to run immediately */
dev->sriov.alias_guid.ports_guid[port_index].
all_rec_per_port[record_num].time_to_run = 0;
dev->sriov.alias_guid.ports_guid[port_index].
all_rec_per_port[record_num].
guids_retry_schedule[index] = 0;
do_work = 1;
unlock:
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
if (do_work)
mlx4_ib_init_alias_guid_work(dev, port_index);
}
/*
* Whenever new GUID is set/unset (guid table change) create event and
* notify the relevant slave (master also should be notified).
@ -134,15 +191,20 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
{
int i;
u64 guid_indexes;
int slave_id;
int slave_id, slave_port;
enum slave_port_state new_state;
enum slave_port_state prev_state;
__be64 tmp_cur_ag, form_cache_ag;
enum slave_port_gen_event gen_event;
struct mlx4_sriov_alias_guid_info_rec_det *rec;
unsigned long flags;
__be64 required_value;
if (!mlx4_is_master(dev->dev))
return;
rec = &dev->sriov.alias_guid.ports_guid[port_num - 1].
all_rec_per_port[block_num];
guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
ports_guid[port_num - 1].
all_rec_per_port[block_num].guid_indexes);
@ -156,8 +218,13 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
continue;
slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
if (slave_id >= dev->dev->num_slaves)
if (slave_id >= dev->dev->persist->num_vfs + 1)
return;
slave_port = mlx4_phys_to_slave_port(dev->dev, slave_id, port_num);
if (slave_port < 0) /* this port isn't available for the VF */
continue;
tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE];
form_cache_ag = get_cached_alias_guid(dev, port_num,
(NUM_ALIAS_GUID_IN_REC * block_num) + i);
@ -168,8 +235,27 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
*/
if (tmp_cur_ag != form_cache_ag)
continue;
mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
required_value = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE];
if (required_value == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
required_value = 0;
if (tmp_cur_ag == required_value) {
rec->guid_indexes = rec->guid_indexes &
~mlx4_ib_get_aguid_comp_mask_from_ix(i);
} else {
/* may notify port down if value is 0 */
if (tmp_cur_ag != MLX4_NOT_SET_GUID) {
spin_unlock_irqrestore(&dev->sriov.
alias_guid.ag_work_lock, flags);
continue;
}
}
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock,
flags);
mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
/*2 cases: Valid GUID, and Invalid Guid*/
if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/
@ -190,10 +276,14 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
&gen_event);
pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
slave_id, port_num);
mlx4_gen_port_state_change_eqe(dev->dev, slave_id, port_num,
MLX4_PORT_CHANGE_SUBTYPE_DOWN);
if (gen_event == SLAVE_PORT_GEN_EVENT_DOWN) {
pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
slave_id, port_num);
mlx4_gen_port_state_change_eqe(dev->dev,
slave_id,
port_num,
MLX4_PORT_CHANGE_SUBTYPE_DOWN);
}
}
}
}
@ -208,6 +298,9 @@ static void aliasguid_query_handler(int status,
int i;
struct mlx4_sriov_alias_guid_info_rec_det *rec;
unsigned long flags, flags1;
ib_sa_comp_mask declined_guid_indexes = 0;
ib_sa_comp_mask applied_guid_indexes = 0;
unsigned int resched_delay_sec = 0;
if (!context)
return;
@ -218,9 +311,9 @@ static void aliasguid_query_handler(int status,
all_rec_per_port[cb_ctx->block_num];
if (status) {
rec->status = MLX4_GUID_INFO_STATUS_IDLE;
pr_debug("(port: %d) failed: status = %d\n",
cb_ctx->port, status);
rec->time_to_run = ktime_get_ns() + 1 * NSEC_PER_SEC;
goto out;
}
@ -237,69 +330,101 @@ static void aliasguid_query_handler(int status,
rec = &dev->sriov.alias_guid.ports_guid[port_index].
all_rec_per_port[guid_rec->block_num];
rec->status = MLX4_GUID_INFO_STATUS_SET;
rec->method = MLX4_GUID_INFO_RECORD_SET;
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) {
__be64 tmp_cur_ag;
tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE];
if ((cb_ctx->method == MLX4_GUID_INFO_RECORD_DELETE)
&& (MLX4_NOT_SET_GUID == tmp_cur_ag)) {
pr_debug("%s:Record num %d in block_num:%d "
"was deleted by SM,ownership by %d "
"(0 = driver, 1=sysAdmin, 2=None)\n",
__func__, i, guid_rec->block_num,
rec->ownership);
rec->guid_indexes = rec->guid_indexes &
~mlx4_ib_get_aguid_comp_mask_from_ix(i);
__be64 sm_response, required_val;
if (!(cb_ctx->guid_indexes &
mlx4_ib_get_aguid_comp_mask_from_ix(i)))
continue;
sm_response = *(__be64 *)&guid_rec->guid_info_list
[i * GUID_REC_SIZE];
required_val = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE];
if (cb_ctx->method == MLX4_GUID_INFO_RECORD_DELETE) {
if (required_val ==
cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
goto next_entry;
/* A new value was set till we got the response */
pr_debug("need to set new value %llx, record num %d, block_num:%d\n",
(long long)be64_to_cpu(required_val),
i, guid_rec->block_num);
goto entry_declined;
}
/* check if the SM didn't assign one of the records.
* if it didn't, if it was not sysadmin request:
* ask the SM to give a new GUID, (instead of the driver request).
* if it didn't, re-ask for.
*/
if (tmp_cur_ag == MLX4_NOT_SET_GUID) {
mlx4_ib_warn(&dev->ib_dev, "%s:Record num %d in "
"block_num: %d was declined by SM, "
"ownership by %d (0 = driver, 1=sysAdmin,"
" 2=None)\n", __func__, i,
guid_rec->block_num, rec->ownership);
if (rec->ownership == MLX4_GUID_DRIVER_ASSIGN) {
/* if it is driver assign, asks for new GUID from SM*/
*(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
MLX4_NOT_SET_GUID;
/* Mark the record as not assigned, and let it
* be sent again in the next work sched.*/
rec->status = MLX4_GUID_INFO_STATUS_IDLE;
rec->guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
}
if (sm_response == MLX4_NOT_SET_GUID) {
if (rec->guids_retry_schedule[i] == 0)
mlx4_ib_warn(&dev->ib_dev,
"%s:Record num %d in block_num: %d was declined by SM\n",
__func__, i,
guid_rec->block_num);
goto entry_declined;
} else {
/* properly assigned record. */
/* We save the GUID we just got from the SM in the
* admin_guid in order to be persistent, and in the
* request from the sm the process will ask for the same GUID */
if (rec->ownership == MLX4_GUID_SYSADMIN_ASSIGN &&
tmp_cur_ag != *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]) {
/* the sysadmin assignment failed.*/
mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
" admin guid after SysAdmin "
"configuration. "
"Record num %d in block_num:%d "
"was declined by SM, "
"new val(0x%llx) was kept\n",
__func__, i,
guid_rec->block_num,
(long long)be64_to_cpu(*(__be64 *) &
rec->all_recs[i * GUID_REC_SIZE]));
if (required_val &&
sm_response != required_val) {
/* Warn only on first retry */
if (rec->guids_retry_schedule[i] == 0)
mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
" admin guid after SysAdmin "
"configuration. "
"Record num %d in block_num:%d "
"was declined by SM, "
"new val(0x%llx) was kept, SM returned (0x%llx)\n",
__func__, i,
guid_rec->block_num,
(long long)be64_to_cpu(required_val),
(long long)be64_to_cpu(sm_response));
goto entry_declined;
} else {
memcpy(&rec->all_recs[i * GUID_REC_SIZE],
&guid_rec->guid_info_list[i * GUID_REC_SIZE],
GUID_REC_SIZE);
*(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
sm_response;
if (required_val == 0)
mlx4_set_admin_guid(dev->dev,
sm_response,
(guid_rec->block_num
* NUM_ALIAS_GUID_IN_REC) + i,
cb_ctx->port);
goto next_entry;
}
}
entry_declined:
declined_guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
rec->guids_retry_schedule[i] =
(rec->guids_retry_schedule[i] == 0) ? 1 :
min((unsigned int)60,
rec->guids_retry_schedule[i] * 2);
/* using the minimum value among all entries in that record */
resched_delay_sec = (resched_delay_sec == 0) ?
rec->guids_retry_schedule[i] :
min(resched_delay_sec,
rec->guids_retry_schedule[i]);
continue;
next_entry:
rec->guids_retry_schedule[i] = 0;
}
applied_guid_indexes = cb_ctx->guid_indexes & ~declined_guid_indexes;
if (declined_guid_indexes ||
rec->guid_indexes & ~(applied_guid_indexes)) {
pr_debug("record=%d wasn't fully set, guid_indexes=0x%llx applied_indexes=0x%llx, declined_indexes=0x%llx\n",
guid_rec->block_num,
(long long)be64_to_cpu((__force __be64)rec->guid_indexes),
(long long)be64_to_cpu((__force __be64)applied_guid_indexes),
(long long)be64_to_cpu((__force __be64)declined_guid_indexes));
rec->time_to_run = ktime_get_ns() +
resched_delay_sec * NSEC_PER_SEC;
} else {
rec->status = MLX4_GUID_INFO_STATUS_SET;
}
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
/*
The func is call here to close the cases when the
sm doesn't send smp, so in the sa response the driver
@ -311,10 +436,13 @@ static void aliasguid_query_handler(int status,
out:
spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
if (!dev->sriov.is_going_down)
if (!dev->sriov.is_going_down) {
get_low_record_time_index(dev, port_index, &resched_delay_sec);
queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq,
&dev->sriov.alias_guid.ports_guid[port_index].
alias_guid_work, 0);
alias_guid_work,
msecs_to_jiffies(resched_delay_sec * 1000));
}
if (cb_ctx->sa_query) {
list_del(&cb_ctx->list);
kfree(cb_ctx);
@ -331,9 +459,7 @@ static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
ib_sa_comp_mask comp_mask = 0;
dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status
= MLX4_GUID_INFO_STATUS_IDLE;
dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].method
= MLX4_GUID_INFO_RECORD_SET;
= MLX4_GUID_INFO_STATUS_SET;
/* calculate the comp_mask for that record.*/
for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
@ -347,19 +473,21 @@ static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
need to assign GUIDs, then don't put it up for assignment.
*/
if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val ||
(!index && !i) ||
MLX4_GUID_NONE_ASSIGN == dev->sriov.alias_guid.
ports_guid[port - 1].all_rec_per_port[index].ownership)
(!index && !i))
continue;
comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
}
dev->sriov.alias_guid.ports_guid[port - 1].
all_rec_per_port[index].guid_indexes = comp_mask;
all_rec_per_port[index].guid_indexes |= comp_mask;
if (dev->sriov.alias_guid.ports_guid[port - 1].
all_rec_per_port[index].guid_indexes)
dev->sriov.alias_guid.ports_guid[port - 1].
all_rec_per_port[index].status = MLX4_GUID_INFO_STATUS_IDLE;
}
static int set_guid_rec(struct ib_device *ibdev,
u8 port, int index,
struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
struct mlx4_next_alias_guid_work *rec)
{
int err;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
@ -368,6 +496,9 @@ static int set_guid_rec(struct ib_device *ibdev,
struct ib_port_attr attr;
struct mlx4_alias_guid_work_context *callback_context;
unsigned long resched_delay, flags, flags1;
u8 port = rec->port + 1;
int index = rec->block_num;
struct mlx4_sriov_alias_guid_info_rec_det *rec_det = &rec->rec_det;
struct list_head *head =
&dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
@ -394,7 +525,9 @@ static int set_guid_rec(struct ib_device *ibdev,
callback_context->port = port;
callback_context->dev = dev;
callback_context->block_num = index;
callback_context->method = rec_det->method;
callback_context->guid_indexes = rec_det->guid_indexes;
callback_context->method = rec->method;
memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec));
guid_info_rec.lid = cpu_to_be16(attr.lid);
@ -413,7 +546,7 @@ static int set_guid_rec(struct ib_device *ibdev,
callback_context->query_id =
ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client,
ibdev, port, &guid_info_rec,
comp_mask, rec_det->method, 1000,
comp_mask, rec->method, 1000,
GFP_KERNEL, aliasguid_query_handler,
callback_context,
&callback_context->sa_query);
@ -448,6 +581,30 @@ static int set_guid_rec(struct ib_device *ibdev,
return err;
}
static void mlx4_ib_guid_port_init(struct mlx4_ib_dev *dev, int port)
{
int j, k, entry;
__be64 guid;
/*Check if the SM doesn't need to assign the GUIDs*/
for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
entry = j * NUM_ALIAS_GUID_IN_REC + k;
/* no request for the 0 entry (hw guid) */
if (!entry || entry > dev->dev->persist->num_vfs ||
!mlx4_is_slave_active(dev->dev, entry))
continue;
guid = mlx4_get_admin_guid(dev->dev, entry, port);
*(__be64 *)&dev->sriov.alias_guid.ports_guid[port - 1].
all_rec_per_port[j].all_recs
[GUID_REC_SIZE * k] = guid;
pr_debug("guid was set, entry=%d, val=0x%llx, port=%d\n",
entry,
(long long)be64_to_cpu(guid),
port);
}
}
}
void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
{
int i;
@ -457,6 +614,13 @@ void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
if (dev->sriov.alias_guid.ports_guid[port - 1].state_flags &
GUID_STATE_NEED_PORT_INIT) {
mlx4_ib_guid_port_init(dev, port);
dev->sriov.alias_guid.ports_guid[port - 1].state_flags &=
(~GUID_STATE_NEED_PORT_INIT);
}
for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++)
invalidate_guid_record(dev, port, i);
@ -476,60 +640,107 @@ void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
}
static void set_required_record(struct mlx4_ib_dev *dev, u8 port,
struct mlx4_next_alias_guid_work *next_rec,
int record_index)
{
int i;
int lowset_time_entry = -1;
int lowest_time = 0;
ib_sa_comp_mask delete_guid_indexes = 0;
ib_sa_comp_mask set_guid_indexes = 0;
struct mlx4_sriov_alias_guid_info_rec_det *rec =
&dev->sriov.alias_guid.ports_guid[port].
all_rec_per_port[record_index];
for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
if (!(rec->guid_indexes &
mlx4_ib_get_aguid_comp_mask_from_ix(i)))
continue;
if (*(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] ==
cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
delete_guid_indexes |=
mlx4_ib_get_aguid_comp_mask_from_ix(i);
else
set_guid_indexes |=
mlx4_ib_get_aguid_comp_mask_from_ix(i);
if (lowset_time_entry == -1 || rec->guids_retry_schedule[i] <=
lowest_time) {
lowset_time_entry = i;
lowest_time = rec->guids_retry_schedule[i];
}
}
memcpy(&next_rec->rec_det, rec, sizeof(*rec));
next_rec->port = port;
next_rec->block_num = record_index;
if (*(__be64 *)&rec->all_recs[lowset_time_entry * GUID_REC_SIZE] ==
cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL)) {
next_rec->rec_det.guid_indexes = delete_guid_indexes;
next_rec->method = MLX4_GUID_INFO_RECORD_DELETE;
} else {
next_rec->rec_det.guid_indexes = set_guid_indexes;
next_rec->method = MLX4_GUID_INFO_RECORD_SET;
}
}
/* return index of record that should be updated based on lowest
* rescheduled time
*/
static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
int *resched_delay_sec)
{
int record_index = -1;
u64 low_record_time = 0;
struct mlx4_sriov_alias_guid_info_rec_det rec;
int j;
for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
rec = dev->sriov.alias_guid.ports_guid[port].
all_rec_per_port[j];
if (rec.status == MLX4_GUID_INFO_STATUS_IDLE &&
rec.guid_indexes) {
if (record_index == -1 ||
rec.time_to_run < low_record_time) {
record_index = j;
low_record_time = rec.time_to_run;
}
}
}
if (resched_delay_sec) {
u64 curr_time = ktime_get_ns();
*resched_delay_sec = (low_record_time < curr_time) ? 0 :
div_u64((low_record_time - curr_time), NSEC_PER_SEC);
}
return record_index;
}
/* The function returns the next record that was
* not configured (or failed to be configured) */
static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port,
struct mlx4_next_alias_guid_work *rec)
{
int j;
unsigned long flags;
int record_index;
int ret = 0;
for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
if (dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status ==
MLX4_GUID_INFO_STATUS_IDLE) {
memcpy(&rec->rec_det,
&dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j],
sizeof (struct mlx4_sriov_alias_guid_info_rec_det));
rec->port = port;
rec->block_num = j;
dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status =
MLX4_GUID_INFO_STATUS_PENDING;
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
return 0;
}
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
record_index = get_low_record_time_index(dev, port, NULL);
if (record_index < 0) {
ret = -ENOENT;
goto out;
}
return -ENOENT;
}
static void set_administratively_guid_record(struct mlx4_ib_dev *dev, int port,
int rec_index,
struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
{
dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].guid_indexes =
rec_det->guid_indexes;
memcpy(dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].all_recs,
rec_det->all_recs, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].status =
rec_det->status;
}
static void set_all_slaves_guids(struct mlx4_ib_dev *dev, int port)
{
int j;
struct mlx4_sriov_alias_guid_info_rec_det rec_det ;
for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT ; j++) {
memset(rec_det.all_recs, 0, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
rec_det.guid_indexes = (!j ? 0 : IB_SA_GUIDINFO_REC_GID0) |
IB_SA_GUIDINFO_REC_GID1 | IB_SA_GUIDINFO_REC_GID2 |
IB_SA_GUIDINFO_REC_GID3 | IB_SA_GUIDINFO_REC_GID4 |
IB_SA_GUIDINFO_REC_GID5 | IB_SA_GUIDINFO_REC_GID6 |
IB_SA_GUIDINFO_REC_GID7;
rec_det.status = MLX4_GUID_INFO_STATUS_IDLE;
set_administratively_guid_record(dev, port, j, &rec_det);
}
set_required_record(dev, port, rec, record_index);
out:
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
return ret;
}
static void alias_guid_work(struct work_struct *work)
@ -559,9 +770,7 @@ static void alias_guid_work(struct work_struct *work)
goto out;
}
set_guid_rec(&dev->ib_dev, rec->port + 1, rec->block_num,
&rec->rec_det);
set_guid_rec(&dev->ib_dev, rec);
out:
kfree(rec);
}
@ -576,6 +785,12 @@ void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port)
spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
if (!dev->sriov.is_going_down) {
/* If there is pending one should cancell then run, otherwise
* won't run till previous one is ended as same work
* struct is used.
*/
cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[port].
alias_guid_work);
queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq,
&dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0);
}
@ -623,7 +838,7 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
{
char alias_wq_name[15];
int ret = 0;
int i, j, k;
int i, j;
union ib_gid gid;
if (!mlx4_is_master(dev->dev))
@ -647,37 +862,29 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
for (i = 0 ; i < dev->num_ports; i++) {
memset(&dev->sriov.alias_guid.ports_guid[i], 0,
sizeof (struct mlx4_sriov_alias_guid_port_rec_det));
/*Check if the SM doesn't need to assign the GUIDs*/
dev->sriov.alias_guid.ports_guid[i].state_flags |=
GUID_STATE_NEED_PORT_INIT;
for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
if (mlx4_ib_sm_guid_assign) {
dev->sriov.alias_guid.ports_guid[i].
all_rec_per_port[j].
ownership = MLX4_GUID_DRIVER_ASSIGN;
continue;
}
dev->sriov.alias_guid.ports_guid[i].all_rec_per_port[j].
ownership = MLX4_GUID_NONE_ASSIGN;
/*mark each val as it was deleted,
till the sysAdmin will give it valid val*/
for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
*(__be64 *)&dev->sriov.alias_guid.ports_guid[i].
all_rec_per_port[j].all_recs[GUID_REC_SIZE * k] =
cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
}
/* mark each val as it was deleted */
memset(dev->sriov.alias_guid.ports_guid[i].
all_rec_per_port[j].all_recs, 0xFF,
sizeof(dev->sriov.alias_guid.ports_guid[i].
all_rec_per_port[j].all_recs));
}
INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list);
/*prepare the records, set them to be allocated by sm*/
if (mlx4_ib_sm_guid_assign)
for (j = 1; j < NUM_ALIAS_GUID_PER_PORT; j++)
mlx4_set_admin_guid(dev->dev, 0, j, i + 1);
for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++)
invalidate_guid_record(dev, i + 1, j);
dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid;
dev->sriov.alias_guid.ports_guid[i].port = i;
if (mlx4_ib_sm_guid_assign)
set_all_slaves_guids(dev, i);
snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i);
dev->sriov.alias_guid.ports_guid[i].wq =
create_singlethread_workqueue(alias_wq_name);
alloc_ordered_workqueue(alias_wq_name, WQ_MEM_RECLAIM);
if (!dev->sriov.alias_guid.ports_guid[i].wq) {
ret = -ENOMEM;
goto err_thread;

View File

@ -242,8 +242,7 @@ static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new)
static struct id_map_entry *
id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id)
{
int ret, id;
static int next_id;
int ret;
struct id_map_entry *ent;
struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
@ -259,25 +258,22 @@ id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id)
ent->dev = to_mdev(ibdev);
INIT_DELAYED_WORK(&ent->timeout, id_map_ent_timeout);
do {
spin_lock(&to_mdev(ibdev)->sriov.id_map_lock);
ret = idr_get_new_above(&sriov->pv_id_table, ent,
next_id, &id);
if (!ret) {
next_id = ((unsigned) id + 1) & MAX_IDR_MASK;
ent->pv_cm_id = (u32)id;
sl_id_map_add(ibdev, ent);
}
idr_preload(GFP_KERNEL);
spin_lock(&to_mdev(ibdev)->sriov.id_map_lock);
spin_unlock(&sriov->id_map_lock);
} while (ret == -EAGAIN && idr_pre_get(&sriov->pv_id_table, GFP_KERNEL));
/*the function idr_get_new_above can return -ENOSPC, so don't insert in that case.*/
if (!ret) {
spin_lock(&sriov->id_map_lock);
ret = idr_alloc_cyclic(&sriov->pv_id_table, ent, 0, 0, GFP_NOWAIT);
if (ret >= 0) {
ent->pv_cm_id = (u32)ret;
sl_id_map_add(ibdev, ent);
list_add_tail(&ent->list, &sriov->cm_list);
spin_unlock(&sriov->id_map_lock);
return ent;
}
spin_unlock(&sriov->id_map_lock);
idr_preload_end();
if (ret >= 0)
return ent;
/*error flow*/
kfree(ent);
mlx4_ib_warn(ibdev, "No more space in the idr (err:0x%x)\n", ret);
@ -327,8 +323,7 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
mad->mad_hdr.attr_id == CM_REP_ATTR_ID ||
mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID ||
mad->mad_hdr.attr_id == CM_SIDR_REP_ATTR_ID) {
mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
sl_cm_id = get_local_comm_id(mad);
id = id_map_alloc(ibdev, slave_id, sl_cm_id);
if (IS_ERR(id)) {
@ -361,7 +356,7 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
}
int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
struct ib_mad *mad, int is_eth)
struct ib_mad *mad)
{
u32 pv_cm_id;
struct id_map_entry *id;
@ -370,7 +365,7 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
union ib_gid gid;
if (is_eth)
if (!slave)
return 0;
gid = gid_from_req_msg(ibdev, mad);
@ -391,7 +386,7 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
return -ENOENT;
}
if (!is_eth)
if (slave)
*slave = id->slave_id;
set_remote_comm_id(mad, id->sl_cm_id);
@ -411,7 +406,6 @@ void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev)
INIT_LIST_HEAD(&dev->sriov.cm_list);
dev->sriov.sl_id_map = RB_ROOT;
idr_init(&dev->sriov.pv_id_table);
idr_pre_get(&dev->sriov.pv_id_table, GFP_KERNEL);
}
/* slave = -1 ==> all slaves */

View File

@ -34,14 +34,11 @@
#include <dev/mlx4/cq.h>
#include <dev/mlx4/qp.h>
#include <dev/mlx4/srq.h>
#include <dev/mlx4/driver.h>
#include <linux/slab.h>
#include "mlx4_ib.h"
#include "user.h"
/* Which firmware version adds support for Resize CQ */
#define MLX4_FW_VER_RESIZE_CQ mlx4_fw_ver(2, 5, 0)
#define MLX4_FW_VER_IGNORE_OVERRUN_CQ mlx4_fw_ver(2, 7, 8200)
#include <rdma/mlx4-abi.h>
static void mlx4_ib_cq_comp(struct mlx4_cq *cq)
{
@ -93,33 +90,12 @@ static struct mlx4_cqe *next_cqe_sw(struct mlx4_ib_cq *cq)
return get_sw_cqe(cq, cq->mcq.cons_index);
}
int mlx4_ib_modify_cq(struct ib_cq *cq,
struct ib_cq_attr *cq_attr,
int cq_attr_mask)
int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
{
int err = 0;
struct mlx4_ib_cq *mcq = to_mcq(cq);
struct mlx4_ib_dev *dev = to_mdev(cq->device);
if (cq_attr_mask & IB_CQ_CAP_FLAGS) {
if (cq_attr->cq_cap_flags & IB_CQ_TIMESTAMP)
return -ENOTSUPP;
if (cq_attr->cq_cap_flags & IB_CQ_IGNORE_OVERRUN) {
if (dev->dev->caps.cq_flags & MLX4_DEV_CAP_CQ_FLAG_IO)
err = mlx4_cq_ignore_overrun(dev->dev, &mcq->mcq);
else
err = -ENOSYS;
}
}
if (!err)
if (cq_attr_mask & IB_CQ_MODERATION)
err = mlx4_cq_modify(dev->dev, &mcq->mcq,
cq_attr->moderation.cq_count,
cq_attr->moderation.cq_period);
return err;
return mlx4_cq_modify(dev->dev, &mcq->mcq, cq_count, cq_period);
}
static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int nent)
@ -127,7 +103,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
int err;
err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
PAGE_SIZE * 2, &buf->buf);
PAGE_SIZE * 2, &buf->buf, GFP_KERNEL);
if (err)
goto out;
@ -138,7 +114,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
if (err)
goto err_buf;
err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf);
err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf, GFP_KERNEL);
if (err)
goto err_mtt;
@ -165,18 +141,14 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont
{
int err;
int cqe_size = dev->dev->caps.cqe_size;
int shift;
int n;
*umem = ib_umem_get(context, buf_addr, cqe * cqe_size,
IB_ACCESS_LOCAL_WRITE, 1);
if (IS_ERR(*umem))
return PTR_ERR(*umem);
n = ib_umem_page_count(*umem);
shift = mlx4_ib_umem_calc_optimal_mtt_size(*umem, 0, &n);
err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt);
err = mlx4_mtt_init(dev->dev, ib_umem_page_count(*umem),
ilog2((*umem)->page_size), &buf->mtt);
if (err)
goto err_buf;
@ -195,20 +167,18 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont
return err;
}
/* we don't support system timestamping */
#define CQ_CREATE_FLAGS_SUPPORTED IB_CQ_TIMESTAMP
#define CQ_CREATE_FLAGS_SUPPORTED IB_CQ_FLAGS_TIMESTAMP_COMPLETION
struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
struct ib_cq_init_attr *attr,
const struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
struct ib_udata *udata)
{
int entries = attr->cqe;
int vector = attr->comp_vector;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_cq *cq;
struct mlx4_uar *uar;
int err;
int entries = attr->cqe;
int vector = attr->comp_vector;
if (entries < 1 || entries > dev->dev->caps.max_cqes)
return ERR_PTR(-EINVAL);
@ -216,7 +186,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED)
return ERR_PTR(-EINVAL);
cq = kzalloc(sizeof(*cq), GFP_KERNEL);
cq = kmalloc(sizeof *cq, GFP_KERNEL);
if (!cq)
return ERR_PTR(-ENOMEM);
@ -227,6 +197,8 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
cq->resize_buf = NULL;
cq->resize_umem = NULL;
cq->create_flags = attr->flags;
INIT_LIST_HEAD(&cq->send_qp_list);
INIT_LIST_HEAD(&cq->recv_qp_list);
if (context) {
struct mlx4_ib_create_cq ucmd;
@ -248,7 +220,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
uar = &to_mucontext(context)->uar;
} else {
err = mlx4_db_alloc(dev->dev, &cq->db, 1);
err = mlx4_db_alloc(dev->dev, &cq->db, 1, GFP_KERNEL);
if (err)
goto err_cq;
@ -269,21 +241,24 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
cq->db.dma, &cq->mcq, vector, 0,
!!(cq->create_flags & IB_CQ_TIMESTAMP));
!!(cq->create_flags & IB_CQ_FLAGS_TIMESTAMP_COMPLETION));
if (err)
goto err_dbmap;
cq->mcq.comp = mlx4_ib_cq_comp;
cq->mcq.comp = mlx4_ib_cq_comp;
cq->mcq.event = mlx4_ib_cq_event;
if (context)
if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) {
err = -EFAULT;
goto err_dbmap;
goto err_cq_free;
}
return &cq->ibcq;
err_cq_free:
mlx4_cq_free(dev->dev, &cq->mcq);
err_dbmap:
if (context)
mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db);
@ -314,7 +289,7 @@ static int mlx4_alloc_resize_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq,
if (cq->resize_buf)
return -EBUSY;
cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_KERNEL);
if (!cq->resize_buf)
return -ENOMEM;
@ -342,7 +317,7 @@ static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
return -EFAULT;
cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_ATOMIC);
cq->resize_buf = kmalloc(sizeof *cq->resize_buf, GFP_KERNEL);
if (!cq->resize_buf)
return -ENOMEM;
@ -370,17 +345,15 @@ static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq)
return i - cq->mcq.cons_index;
}
static int mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
{
struct mlx4_cqe *cqe, *new_cqe;
int i;
int cqe_size = cq->buf.entry_size;
int cqe_inc = cqe_size == 64 ? 1 : 0;
struct mlx4_cqe *start_cqe;
i = cq->mcq.cons_index;
cqe = get_cqe(cq, i & cq->ibcq.cqe);
start_cqe = cqe;
cqe += cqe_inc;
while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) {
@ -392,15 +365,9 @@ static int mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) |
(((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0);
cqe = get_cqe(cq, ++i & cq->ibcq.cqe);
if (cqe == start_cqe) {
pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n", cq->mcq.cqn);
return -ENOMEM;
}
cqe += cqe_inc;
}
++cq->mcq.cons_index;
return 0;
}
int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
@ -411,9 +378,6 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
int outst_cqe;
int err;
if (dev->dev->caps.fw_ver < MLX4_FW_VER_RESIZE_CQ)
return -ENOSYS;
mutex_lock(&cq->resize_mutex);
if (entries < 1 || entries > dev->dev->caps.max_cqes) {
err = -EINVAL;
@ -439,7 +403,7 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
/* Can't be smaller than the number of outstanding CQEs */
outst_cqe = mlx4_ib_get_outstanding_cqes(cq);
if (entries < outst_cqe + 1) {
err = 0;
err = -EINVAL;
goto out;
}
@ -470,7 +434,7 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
spin_lock_irq(&cq->lock);
if (cq->resize_buf) {
err = mlx4_ib_cq_resize_copy_cqes(cq);
mlx4_ib_cq_resize_copy_cqes(cq);
tmp_buf = cq->buf;
tmp_cqe = cq->ibcq.cqe;
cq->buf = cq->resize_buf->buf;
@ -507,17 +471,6 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
return err;
}
int mlx4_ib_ignore_overrun_cq(struct ib_cq *ibcq)
{
struct mlx4_ib_dev *dev = to_mdev(ibcq->device);
struct mlx4_ib_cq *cq = to_mcq(ibcq);
if (dev->dev->caps.fw_ver < MLX4_FW_VER_IGNORE_OVERRUN_CQ)
return -ENOSYS;
return mlx4_cq_ignore_overrun(dev->dev, &cq->mcq);
}
int mlx4_ib_destroy_cq(struct ib_cq *cq)
{
struct mlx4_ib_dev *dev = to_mdev(cq->device);
@ -624,8 +577,8 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum)
checksum == cpu_to_be16(0xffff);
}
static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
unsigned tail, struct mlx4_cqe *cqe, int is_eth)
static void use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
unsigned tail, struct mlx4_cqe *cqe, int is_eth)
{
struct mlx4_ib_proxy_sqp_hdr *hdr;
@ -643,12 +596,60 @@ static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct
wc->vlan_id = be16_to_cpu(hdr->tun.sl_vid);
memcpy(&(wc->smac[0]), (char *)&hdr->tun.mac_31_0, 4);
memcpy(&(wc->smac[4]), (char *)&hdr->tun.slid_mac_47_32, 2);
wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
} else {
wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32);
wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
}
}
return 0;
static void mlx4_ib_qp_sw_comp(struct mlx4_ib_qp *qp, int num_entries,
struct ib_wc *wc, int *npolled, int is_send)
{
struct mlx4_ib_wq *wq;
unsigned cur;
int i;
wq = is_send ? &qp->sq : &qp->rq;
cur = wq->head - wq->tail;
if (cur == 0)
return;
for (i = 0; i < cur && *npolled < num_entries; i++) {
wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
wc->status = IB_WC_WR_FLUSH_ERR;
wc->vendor_err = MLX4_CQE_SYNDROME_WR_FLUSH_ERR;
wq->tail++;
(*npolled)++;
wc->qp = &qp->ibqp;
wc++;
}
}
static void mlx4_ib_poll_sw_comp(struct mlx4_ib_cq *cq, int num_entries,
struct ib_wc *wc, int *npolled)
{
struct mlx4_ib_qp *qp;
*npolled = 0;
/* Find uncompleted WQEs belonging to that cq and retrun
* simulated FLUSH_ERR completions
*/
list_for_each_entry(qp, &cq->send_qp_list, cq_send_list) {
mlx4_ib_qp_sw_comp(qp, num_entries, wc + *npolled, npolled, 1);
if (*npolled >= num_entries)
goto out;
}
list_for_each_entry(qp, &cq->recv_qp_list, cq_recv_list) {
mlx4_ib_qp_sw_comp(qp, num_entries, wc + *npolled, npolled, 0);
if (*npolled >= num_entries)
goto out;
}
out:
return;
}
static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
@ -662,11 +663,10 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
struct mlx4_srq *msrq = NULL;
int is_send;
int is_error;
int is_eth;
u32 g_mlpath_rqpn;
u16 wqe_ctr;
unsigned tail = 0;
int timestamp_en = !!(cq->create_flags & IB_CQ_TIMESTAMP);
repoll:
cqe = next_cqe_sw(cq);
@ -688,12 +688,6 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
MLX4_CQE_OPCODE_ERROR;
if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP &&
is_send)) {
pr_warn("Completion for NOP opcode detected!\n");
return -EINVAL;
}
/* Resize CQ in progress */
if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_RESIZE)) {
if (cq->resize_buf) {
@ -719,12 +713,6 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
*/
mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev,
be32_to_cpu(cqe->vlan_my_qpn));
if (unlikely(!mqp)) {
pr_warn("CQ %06x with entry for unknown QPN %06x\n",
cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK);
return -EINVAL;
}
*cur_qp = to_mibqp(mqp);
}
@ -737,11 +725,6 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
/* SRQ is also in the radix tree */
msrq = mlx4_srq_lookup(to_mdev(cq->ibcq.device)->dev,
srq_num);
if (unlikely(!msrq)) {
pr_warn("CQ %06x with entry for unknown SRQN %06x\n",
cq->mcq.cqn, srq_num);
return -EINVAL;
}
}
if (is_send) {
@ -781,7 +764,6 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
case MLX4_OPCODE_RDMA_WRITE_IMM:
wc->wc_flags |= IB_WC_WITH_IMM;
/* fall through */
case MLX4_OPCODE_RDMA_WRITE:
wc->opcode = IB_WC_RDMA_WRITE;
break;
@ -811,14 +793,11 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
wc->opcode = IB_WC_MASKED_FETCH_ADD;
wc->byte_len = 8;
break;
case MLX4_OPCODE_BIND_MW:
wc->opcode = IB_WC_BIND_MW;
break;
case MLX4_OPCODE_LSO:
wc->opcode = IB_WC_LSO;
break;
case MLX4_OPCODE_FMR:
wc->opcode = IB_WC_FAST_REG_MR;
wc->opcode = IB_WC_REG_MR;
break;
case MLX4_OPCODE_LOCAL_INVAL:
wc->opcode = IB_WC_LOCAL_INV;
@ -849,35 +828,20 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
break;
}
is_eth = (rdma_port_get_link_layer(wc->qp->device,
(*cur_qp)->port) ==
IB_LINK_LAYER_ETHERNET);
if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) {
if ((*cur_qp)->mlx4_ib_qp_type &
(MLX4_IB_QPT_PROXY_SMI_OWNER |
MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
return use_tunnel_data
(*cur_qp, cq, wc, tail, cqe,
rdma_port_get_link_layer
(wc->qp->device,
(*cur_qp)->port) ==
IB_LINK_LAYER_ETHERNET);
MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
use_tunnel_data(*cur_qp, cq, wc, tail, cqe,
is_eth);
return 0;
}
}
if (timestamp_en) {
/* currently, only CQ_CREATE_WITH_TIMESTAMPING_RAW is
* supported. CQ_CREATE_WITH_TIMESTAMPING_SYS isn't
* supported */
if (cq->create_flags & IB_CQ_TIMESTAMP_TO_SYS_TIME) {
wc->ts.timestamp = 0;
} else {
wc->ts.timestamp =
((u64)(be32_to_cpu(cqe->timestamp_16_47)
+ !cqe->timestamp_0_15) << 16)
| be16_to_cpu(cqe->timestamp_0_15);
wc->wc_flags |= IB_WC_WITH_TIMESTAMP;
}
} else {
wc->wc_flags |= IB_WC_WITH_SLID;
wc->slid = be16_to_cpu(cqe->rlid);
}
wc->slid = be16_to_cpu(cqe->rlid);
g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn);
wc->src_qp = g_mlpath_rqpn & 0xffffff;
wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
@ -885,27 +849,21 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status,
cqe->checksum) ? IB_WC_IP_CSUM_OK : 0;
if (!timestamp_en) {
if (rdma_port_get_link_layer(wc->qp->device,
(*cur_qp)->port) ==
IB_LINK_LAYER_ETHERNET)
wc->sl = be16_to_cpu(cqe->sl_vid) >> 13;
else
wc->sl = be16_to_cpu(cqe->sl_vid) >> 12;
wc->wc_flags |= IB_WC_WITH_SL;
}
if ((be32_to_cpu(cqe->vlan_my_qpn) &
MLX4_CQE_VLAN_PRESENT_MASK) && !timestamp_en) {
wc->vlan_id = be16_to_cpu(cqe->sl_vid) &
MLX4_CQE_VID_MASK;
wc->wc_flags |= IB_WC_WITH_VLAN;
if (is_eth) {
wc->sl = be16_to_cpu(cqe->sl_vid) >> 13;
if (be32_to_cpu(cqe->vlan_my_qpn) &
MLX4_CQE_CVLAN_PRESENT_MASK) {
wc->vlan_id = be16_to_cpu(cqe->sl_vid) &
MLX4_CQE_VID_MASK;
} else {
wc->vlan_id = 0xffff;
}
memcpy(wc->smac, cqe->smac, ETH_ALEN);
wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
} else {
wc->sl = be16_to_cpu(cqe->sl_vid) >> 12;
wc->vlan_id = 0xffff;
}
if (!timestamp_en) {
memcpy(wc->smac, cqe->smac, 6);
wc->wc_flags |= IB_WC_WITH_SMAC;
}
}
return 0;
@ -917,24 +875,25 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
struct mlx4_ib_qp *cur_qp = NULL;
unsigned long flags;
int npolled;
int err = 0;
struct mlx4_ib_dev *mdev = to_mdev(cq->ibcq.device);
spin_lock_irqsave(&cq->lock, flags);
if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
mlx4_ib_poll_sw_comp(cq, num_entries, wc, &npolled);
goto out;
}
for (npolled = 0; npolled < num_entries; ++npolled) {
err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled);
if (err)
if (mlx4_ib_poll_one(cq, &cur_qp, wc + npolled))
break;
}
mlx4_cq_set_ci(&cq->mcq);
out:
spin_unlock_irqrestore(&cq->lock, flags);
if (err == 0 || err == -EAGAIN)
return npolled;
else
return err;
return npolled;
}
int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
@ -942,7 +901,7 @@ int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
mlx4_cq_arm(&to_mcq(ibcq)->mcq,
(flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT,
to_mdev(ibcq->device)->priv_uar.map,
to_mdev(ibcq->device)->uar_map,
MLX4_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->uar_lock));
return 0;

View File

@ -1,116 +0,0 @@
/*
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "mlx4_ib.h"
#include "mlx4_exp.h"
#include <dev/mlx4/qp.h>
int mlx4_ib_exp_query_device(struct ib_device *ibdev,
struct ib_exp_device_attr *props)
{
struct ib_device_attr *base = &props->base;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
int ret = mlx4_ib_query_device(ibdev, &props->base);
props->exp_comp_mask = IB_EXP_DEVICE_ATTR_INLINE_RECV_SZ;
props->inline_recv_sz = dev->dev->caps.max_rq_sg * sizeof(struct mlx4_wqe_data_seg);
props->device_cap_flags2 = 0;
/* move RSS device cap from device_cap to device_cap_flags2 */
if (base->device_cap_flags & IB_DEVICE_QPG) {
props->device_cap_flags2 |= IB_EXP_DEVICE_QPG;
if (base->device_cap_flags & IB_DEVICE_UD_RSS)
props->device_cap_flags2 |= IB_EXP_DEVICE_UD_RSS;
}
base->device_cap_flags &= ~(IB_DEVICE_QPG |
IB_DEVICE_UD_RSS |
IB_DEVICE_UD_TSS);
if (base->max_rss_tbl_sz > 0) {
props->max_rss_tbl_sz = base->max_rss_tbl_sz;
props->exp_comp_mask |= IB_EXP_DEVICE_ATTR_RSS_TBL_SZ;
} else {
props->max_rss_tbl_sz = 0;
props->exp_comp_mask &= ~IB_EXP_DEVICE_ATTR_RSS_TBL_SZ;
}
if (props->device_cap_flags2)
props->exp_comp_mask |= IB_EXP_DEVICE_ATTR_CAP_FLAGS2;
return ret;
}
/*
* Experimental functions
*/
struct ib_qp *mlx4_ib_exp_create_qp(struct ib_pd *pd,
struct ib_exp_qp_init_attr *init_attr,
struct ib_udata *udata)
{
int rwqe_size;
struct ib_qp *qp;
struct mlx4_ib_qp *mqp;
int use_inlr;
struct mlx4_ib_dev *dev;
if (init_attr->max_inl_recv && !udata)
return ERR_PTR(-EINVAL);
use_inlr = mlx4_ib_qp_has_rq((struct ib_qp_init_attr *)init_attr) &&
init_attr->max_inl_recv && pd;
if (use_inlr) {
rwqe_size = roundup_pow_of_two(max(1U, init_attr->cap.max_recv_sge)) *
sizeof(struct mlx4_wqe_data_seg);
if (rwqe_size < init_attr->max_inl_recv) {
dev = to_mdev(pd->device);
init_attr->max_inl_recv = min(init_attr->max_inl_recv,
(u32)(dev->dev->caps.max_rq_sg *
sizeof(struct mlx4_wqe_data_seg)));
init_attr->cap.max_recv_sge = roundup_pow_of_two(init_attr->max_inl_recv) /
sizeof(struct mlx4_wqe_data_seg);
}
} else {
init_attr->max_inl_recv = 0;
}
qp = mlx4_ib_create_qp(pd, (struct ib_qp_init_attr *)init_attr, udata);
if (IS_ERR(qp))
return qp;
if (use_inlr) {
mqp = to_mqp(qp);
mqp->max_inlr_data = 1 << mqp->rq.wqe_shift;
init_attr->max_inl_recv = mqp->max_inlr_data;
}
return qp;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -51,6 +51,10 @@
pr_warn("%s-%d: %16s (port %d): WARNING: " format, __func__, __LINE__,\
(group)->name, group->demux->port, ## arg)
#define mcg_debug_group(group, format, arg...) \
pr_debug("%s-%d: %16s (port %d): WARNING: " format, __func__, __LINE__,\
(group)->name, (group)->demux->port, ## arg)
#define mcg_error_group(group, format, arg...) \
pr_err(" %16s: " format, (group)->name, ## arg)
@ -92,7 +96,7 @@ struct ib_sa_mcmember_data {
u8 scope_join_state;
u8 proxy_join;
u8 reserved[2];
};
} __packed __aligned(4);
struct mcast_group {
struct ib_sa_mcmember_data rec;
@ -206,17 +210,19 @@ static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad)
{
struct mlx4_ib_dev *dev = ctx->dev;
struct ib_ah_attr ah_attr;
unsigned long flags;
spin_lock(&dev->sm_lock);
spin_lock_irqsave(&dev->sm_lock, flags);
if (!dev->sm_ah[ctx->port - 1]) {
/* port is not yet Active, sm_ah not ready */
spin_unlock(&dev->sm_lock);
spin_unlock_irqrestore(&dev->sm_lock, flags);
return -EAGAIN;
}
mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
spin_unlock(&dev->sm_lock);
return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), ctx->port,
IB_QPT_GSI, 0, 1, IB_QP1_QKEY, &ah_attr, 0, mad);
spin_unlock_irqrestore(&dev->sm_lock, flags);
return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev),
ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY,
&ah_attr, NULL, 0xffff, mad);
}
static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx,
@ -483,7 +489,7 @@ static u8 get_leave_state(struct mcast_group *group)
if (!group->members[i])
leave_state |= (1 << i);
return leave_state & (group->rec.scope_join_state & 7);
return leave_state & (group->rec.scope_join_state & 0xf);
}
static int join_group(struct mcast_group *group, int slave, u8 join_mask)
@ -558,8 +564,8 @@ static void mlx4_ib_mcg_timeout_handler(struct work_struct *work)
} else
mcg_warn_group(group, "DRIVER BUG\n");
} else if (group->state == MCAST_LEAVE_SENT) {
if (group->rec.scope_join_state & 7)
group->rec.scope_join_state &= 0xf8;
if (group->rec.scope_join_state & 0xf)
group->rec.scope_join_state &= 0xf0;
group->state = MCAST_IDLE;
mutex_unlock(&group->lock);
if (release_group(group, 1))
@ -599,7 +605,7 @@ static int handle_leave_req(struct mcast_group *group, u8 leave_mask,
static int handle_join_req(struct mcast_group *group, u8 join_mask,
struct mcast_req *req)
{
u8 group_join_state = group->rec.scope_join_state & 7;
u8 group_join_state = group->rec.scope_join_state & 0xf;
int ref = 0;
u16 status;
struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
@ -685,11 +691,11 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work)
u8 cur_join_state;
resp_join_state = ((struct ib_sa_mcmember_data *)
group->response_sa_mad.data)->scope_join_state & 7;
cur_join_state = group->rec.scope_join_state & 7;
group->response_sa_mad.data)->scope_join_state & 0xf;
cur_join_state = group->rec.scope_join_state & 0xf;
if (method == IB_MGMT_METHOD_GET_RESP) {
/* successful join */
/* successfull join */
if (!cur_join_state && resp_join_state)
--rc;
} else if (!resp_join_state)
@ -705,7 +711,7 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work)
req = list_first_entry(&group->pending_list, struct mcast_req,
group_list);
sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
req_join_state = sa_data->scope_join_state & 0x7;
req_join_state = sa_data->scope_join_state & 0xf;
/* For a leave request, we will immediately answer the VF, and
* update our internal counters. The actual leave will be sent
@ -742,14 +748,11 @@ static struct mcast_group *search_relocate_mgid0_group(struct mlx4_ib_demux_ctx
__be64 tid,
union ib_gid *new_mgid)
{
struct mcast_group *group = NULL, *cur_group;
struct mcast_group *group = NULL, *cur_group, *n;
struct mcast_req *req;
struct list_head *pos;
struct list_head *n;
mutex_lock(&ctx->mcg_table_lock);
list_for_each_safe(pos, n, &ctx->mcg_mgid0_list) {
group = list_entry(pos, struct mcast_group, mgid0_list);
list_for_each_entry_safe(group, n, &ctx->mcg_mgid0_list, mgid0_list) {
mutex_lock(&group->lock);
if (group->last_req_tid == tid) {
if (memcmp(new_mgid, &mgid0, sizeof mgid0)) {
@ -963,8 +966,8 @@ int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port,
mutex_lock(&group->lock);
if (group->func[slave].num_pend_reqs > MAX_PEND_REQS_PER_FUNC) {
mutex_unlock(&group->lock);
mcg_warn_group(group, "Port %d, Func %d has too many pending requests (%d), dropping\n",
port, slave, MAX_PEND_REQS_PER_FUNC);
mcg_debug_group(group, "Port %d, Func %d has too many pending requests (%d), dropping\n",
port, slave, MAX_PEND_REQS_PER_FUNC);
release_group(group, 0);
kfree(req);
return -ENOMEM;
@ -1046,7 +1049,7 @@ int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx)
atomic_set(&ctx->tid, 0);
sprintf(name, "mlx4_ib_mcg%d", ctx->port);
ctx->mcg_wq = create_singlethread_workqueue(name);
ctx->mcg_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
if (!ctx->mcg_wq)
return -ENOMEM;
@ -1247,7 +1250,7 @@ void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave)
int mlx4_ib_mcg_init(void)
{
clean_wq = create_singlethread_workqueue("mlx4_ib_mcg");
clean_wq = alloc_ordered_workqueue("mlx4_ib_mcg", WQ_MEM_RECLAIM);
if (!clean_wq)
return -ENOMEM;

View File

@ -35,6 +35,8 @@
#include <linux/module.h>
#include <linux/sched.h>
#include <asm/atomic64.h>
#include "mlx4_ib.h"
static u32 convert_access(int acc)
@ -43,71 +45,25 @@ static u32 convert_access(int acc)
(acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) |
(acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) |
(acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) |
(acc & IB_ACCESS_MW_BIND ? MLX4_PERM_BIND_MW : 0) |
(acc & IB_ACCESS_MW_BIND ? MLX4_PERM_BIND_MW : 0) |
MLX4_PERM_LOCAL_READ;
}
/* No suuport for Shared MR feature */
#if 0
static ssize_t shared_mr_proc_read(struct file *file,
char __user *buffer,
size_t len,
loff_t *offset)
static enum mlx4_mw_type to_mlx4_type(enum ib_mw_type type)
{
return -ENOSYS;
switch (type) {
case IB_MW_TYPE_1: return MLX4_MW_TYPE_1;
case IB_MW_TYPE_2: return MLX4_MW_TYPE_2;
default: return -1;
}
}
static ssize_t shared_mr_proc_write(struct file *file,
const char __user *buffer,
size_t len,
loff_t *offset)
{
return -ENOSYS;
}
static int shared_mr_mmap(struct file *filep, struct vm_area_struct *vma)
{
struct proc_dir_entry *pde = PDE(filep->f_path.dentry->d_inode);
struct mlx4_shared_mr_info *smr_info =
(struct mlx4_shared_mr_info *)pde->data;
/* Prevent any mapping not on start of area */
if (vma->vm_pgoff != 0)
return -EINVAL;
return ib_umem_map_to_vma(smr_info->umem,
vma);
}
static const struct file_operations shared_mr_proc_ops = {
.owner = THIS_MODULE,
.read = shared_mr_proc_read,
.write = shared_mr_proc_write,
.mmap = shared_mr_mmap
};
static mode_t convert_shared_access(int acc)
{
return (acc & IB_ACCESS_SHARED_MR_USER_READ ? S_IRUSR : 0) |
(acc & IB_ACCESS_SHARED_MR_USER_WRITE ? S_IWUSR : 0) |
(acc & IB_ACCESS_SHARED_MR_GROUP_READ ? S_IRGRP : 0) |
(acc & IB_ACCESS_SHARED_MR_GROUP_WRITE ? S_IWGRP : 0) |
(acc & IB_ACCESS_SHARED_MR_OTHER_READ ? S_IROTH : 0) |
(acc & IB_ACCESS_SHARED_MR_OTHER_WRITE ? S_IWOTH : 0);
}
#endif
struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
{
struct mlx4_ib_mr *mr;
int err;
mr = kzalloc(sizeof *mr, GFP_KERNEL);
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
@ -134,420 +90,78 @@ struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
return ERR_PTR(err);
}
static int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev,
struct mlx4_mtt *mtt,
u64 mtt_size,
u64 mtt_shift,
u64 len,
u64 cur_start_addr,
u64 *pages,
int *start_index,
int *npages)
{
int k;
int err = 0;
u64 mtt_entries;
u64 cur_end_addr = cur_start_addr + len;
u64 cur_end_addr_aligned = 0;
len += (cur_start_addr & (mtt_size-1ULL));
cur_end_addr_aligned = round_up(cur_end_addr, mtt_size);
len += (cur_end_addr_aligned - cur_end_addr);
if (len & (mtt_size-1ULL)) {
WARN(1 ,
"write_block: len %llx is not aligned to mtt_size %llx\n",
(unsigned long long)len, (unsigned long long)mtt_size);
return -EINVAL;
}
mtt_entries = (len >> mtt_shift);
/* Align the MTT start address to
the mtt_size.
Required to handle cases when the MR
starts in the middle of an MTT record.
Was not required in old code since
the physical addresses provided by
the dma subsystem were page aligned,
which was also the MTT size.
*/
cur_start_addr = round_down(cur_start_addr, mtt_size);
/* A new block is started ...*/
for (k = 0; k < mtt_entries; ++k) {
pages[*npages] = cur_start_addr + (mtt_size * k);
(*npages)++;
/*
* Be friendly to mlx4_write_mtt() and
* pass it chunks of appropriate size.
*/
if (*npages == PAGE_SIZE / sizeof(u64)) {
err = mlx4_write_mtt(dev->dev,
mtt, *start_index,
*npages, pages);
if (err)
return err;
(*start_index) += *npages;
*npages = 0;
}
}
return 0;
}
int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem)
{
u64 *pages;
u64 len = 0;
int i, k, entry;
int n;
int len;
int err = 0;
u64 mtt_size;
u64 cur_start_addr = 0;
u64 mtt_shift;
int start_index = 0;
int npages = 0;
struct scatterlist *sg;
int i;
pages = (u64 *) __get_free_page(GFP_KERNEL);
if (!pages)
return -ENOMEM;
mtt_shift = mtt->page_shift;
mtt_size = 1ULL << mtt_shift;
i = n = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
if (cur_start_addr + len ==
sg_dma_address(sg)) {
/* still the same block */
len += sg_dma_len(sg);
continue;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
len = sg_dma_len(sg) >> mtt->page_shift;
for (k = 0; k < len; ++k) {
pages[i++] = sg_dma_address(sg) +
umem->page_size * k;
/*
* Be friendly to mlx4_write_mtt() and
* pass it chunks of appropriate size.
*/
if (i == PAGE_SIZE / sizeof (u64)) {
err = mlx4_write_mtt(dev->dev, mtt, n,
i, pages);
if (err)
goto out;
n += i;
i = 0;
}
/* A new block is started ...*/
/* If len is malaligned, write an extra mtt entry to
cover the misaligned area (round up the division)
*/
err = mlx4_ib_umem_write_mtt_block(dev,
mtt, mtt_size, mtt_shift,
len, cur_start_addr,
pages,
&start_index,
&npages);
if (err)
goto out;
cur_start_addr =
sg_dma_address(sg);
len = sg_dma_len(sg);
}
}
/* Handle the last block */
if (len > 0) {
/* If len is malaligned, write an extra mtt entry to cover
the misaligned area (round up the division)
*/
err = mlx4_ib_umem_write_mtt_block(dev,
mtt, mtt_size, mtt_shift,
len, cur_start_addr,
pages,
&start_index,
&npages);
if (err)
goto out;
}
if (npages)
err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages);
if (i)
err = mlx4_write_mtt(dev->dev, mtt, n, i, pages);
out:
free_page((unsigned long) pages);
return err;
}
static inline u64 alignment_of(u64 ptr)
{
return ilog2(ptr & (~(ptr-1)));
}
static int mlx4_ib_umem_calc_block_mtt(u64 next_block_start,
u64 current_block_end,
u64 block_shift)
{
/* Check whether the alignment of the new block
is aligned as well as the previous block.
Block address must start with zeros till size of entity_size.
*/
if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0)
/* It is not as well aligned as the
previous block-reduce the mtt size
accordingly.
Here we take the last right bit
which is 1.
*/
block_shift = alignment_of(next_block_start);
/* Check whether the alignment of the
end of previous block - is it aligned
as well as the start of the block
*/
if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0)
/* It is not as well aligned as
the start of the block - reduce the
mtt size accordingly.
*/
block_shift = alignment_of(current_block_end);
return block_shift;
}
/* Calculate optimal mtt size based on contiguous pages.
* Function will return also the number of pages that are not aligned to the
calculated mtt_size to be added to total number
of pages. For that we should check the first chunk length & last chunk
length and if not aligned to mtt_size we should increment
the non_aligned_pages number.
All chunks in the middle already handled as part of mtt shift calculation
for both their start & end addresses.
*/
int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
u64 start_va,
int *num_of_mtts)
{
u64 block_shift = MLX4_MAX_MTT_SHIFT;
u64 current_block_len = 0;
u64 current_block_start = 0;
u64 misalignment_bits;
u64 first_block_start = 0;
u64 last_block_end = 0;
u64 total_len = 0;
u64 last_block_aligned_end = 0;
u64 min_shift = ilog2(umem->page_size);
struct scatterlist *sg;
int i;
u64 next_block_start;
u64 current_block_end;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
/* Initialization - save the first chunk start as
the current_block_start - block means contiguous pages.
*/
if (current_block_len == 0 && current_block_start == 0) {
first_block_start = current_block_start =
sg_dma_address(sg);
/* Find the bits that are different between
the physical address and the virtual
address for the start of the MR.
*/
/* umem_get aligned the start_va to a page
boundary. Therefore, we need to align the
start va to the same boundary */
/* misalignment_bits is needed to handle the
case of a single memory region. In this
case, the rest of the logic will not reduce
the block size. If we use a block size
which is bigger than the alignment of the
misalignment bits, we might use the virtual
page number instead of the physical page
number, resulting in access to the wrong
data. */
misalignment_bits =
(start_va & (~(((u64)(umem->page_size))-1ULL)))
^ current_block_start;
block_shift = min(alignment_of(misalignment_bits)
, block_shift);
}
/* Go over the scatter entries and check
if they continue the previous scatter entry.
*/
next_block_start =
sg_dma_address(sg);
current_block_end = current_block_start
+ current_block_len;
/* If we have a split (non-contig.) between two block*/
if (current_block_end != next_block_start) {
block_shift = mlx4_ib_umem_calc_block_mtt(
next_block_start,
current_block_end,
block_shift);
/* If we reached the minimum shift for 4k
page we stop the loop.
*/
if (block_shift <= min_shift)
goto end;
/* If not saved yet we are in first block -
we save the length of first block to
calculate the non_aligned_pages number at
* the end.
*/
total_len += current_block_len;
/* Start a new block */
current_block_start = next_block_start;
current_block_len =
sg_dma_len(sg);
continue;
}
/* The scatter entry is another part of
the current block, increase the block size
* An entry in the scatter can be larger than
4k (page) as of dma mapping
which merge some blocks together.
*/
current_block_len +=
sg_dma_len(sg);
}
/* Account for the last block in the total len */
total_len += current_block_len;
/* Add to the first block the misalignment that it suffers from.*/
total_len += (first_block_start & ((1ULL<<block_shift)-1ULL));
last_block_end = current_block_start+current_block_len;
last_block_aligned_end = round_up(last_block_end, 1<<block_shift);
total_len += (last_block_aligned_end - last_block_end);
WARN((total_len & ((1ULL<<block_shift)-1ULL)),
" misaligned total length detected (%llu, %llu)!",
(unsigned long long)total_len, (unsigned long long)block_shift);
*num_of_mtts = total_len >> block_shift;
end:
if (block_shift < min_shift) {
/* If shift is less than the min we set a WARN and
return the min shift.
*/
WARN(1,
"mlx4_ib_umem_calc_optimal_mtt_size - unexpected shift %lld\n",
(unsigned long long)block_shift);
block_shift = min_shift;
}
return block_shift;
}
/* No suuport for Shared MR */
#if 0
static int prepare_shared_mr(struct mlx4_ib_mr *mr, int access_flags, int mr_id)
{
struct proc_dir_entry *mr_proc_entry;
mode_t mode = S_IFREG;
char name_buff[16];
mode |= convert_shared_access(access_flags);
sprintf(name_buff, "%X", mr_id);
mr->smr_info = kmalloc(sizeof(struct mlx4_shared_mr_info), GFP_KERNEL);
mr->smr_info->mr_id = mr_id;
mr->smr_info->umem = mr->umem;
mr_proc_entry = proc_create_data(name_buff, mode,
mlx4_mrs_dir_entry,
&shared_mr_proc_ops,
mr->smr_info);
if (!mr_proc_entry) {
pr_err("prepare_shared_mr failed via proc\n");
kfree(mr->smr_info);
return -ENODEV;
}
current_uid_gid(&(mr_proc_entry->uid), &(mr_proc_entry->gid));
mr_proc_entry->size = mr->umem->length;
return 0;
}
static int is_shared_mr(int access_flags)
{
/* We should check whether IB_ACCESS_SHARED_MR_USER_READ or
other shared bits were turned on.
*/
return !!(access_flags & (IB_ACCESS_SHARED_MR_USER_READ |
IB_ACCESS_SHARED_MR_USER_WRITE |
IB_ACCESS_SHARED_MR_GROUP_READ |
IB_ACCESS_SHARED_MR_GROUP_WRITE |
IB_ACCESS_SHARED_MR_OTHER_READ |
IB_ACCESS_SHARED_MR_OTHER_WRITE));
}
static void free_smr_info(struct mlx4_ib_mr *mr)
{
/* When master/parent shared mr is dereged there is
no ability to share this mr any more - its mr_id will be
returned to the kernel as part of ib_uverbs_dereg_mr
and may be allocated again as part of other reg_mr.
*/
char name_buff[16];
sprintf(name_buff, "%X", mr->smr_info->mr_id);
/* Remove proc entry is checking internally that no operation
was strated on that proc fs file and if in the middle
current process will wait till end of operation.
That's why no sync mechanism is needed when we release
below the shared umem.
*/
remove_proc_entry(name_buff, mlx4_mrs_dir_entry);
kfree(mr->smr_info);
mr->smr_info = NULL;
}
#endif
static void mlx4_invalidate_umem(void *invalidation_cookie,
struct ib_umem *umem,
unsigned long addr, size_t size)
{
struct mlx4_ib_mr *mr = (struct mlx4_ib_mr *)invalidation_cookie;
/* This function is called under client peer lock so its resources are race protected */
if (atomic_inc_return(&mr->invalidated) > 1) {
umem->invalidation_ctx->inflight_invalidation = 1;
goto end;
}
umem->invalidation_ctx->peer_callback = 1;
mlx4_mr_free(to_mdev(mr->ibmr.device)->dev, &mr->mmr);
ib_umem_release(umem);
complete(&mr->invalidation_comp);
end:
return;
}
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata,
int mr_id)
struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_mr *mr;
int shift;
int err;
int n;
struct ib_peer_memory_client *ib_peer_mem;
mr = kzalloc(sizeof *mr, GFP_KERNEL);
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
mr->umem = ib_umem_get_ex(pd->uobject->context, start, length,
access_flags, 0, 1);
/* Force registering the memory as writable. */
/* Used for memory re-registeration. HCA protects the access */
mr->umem = ib_umem_get(pd->uobject->context, start, length,
access_flags | IB_ACCESS_LOCAL_WRITE, 0);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err_free;
}
ib_peer_mem = mr->umem->ib_peer_mem;
n = ib_umem_page_count(mr->umem);
shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start,
&n);
shift = ilog2(mr->umem->page_size);
err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
convert_access(access_flags), n, shift, &mr->mmr);
convert_access(access_flags), n, shift, &mr->mmr);
if (err)
goto err_umem;
@ -560,46 +174,9 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto err_mr;
mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
/* No suuport for Shared MR */
#if 0
/* Check whether MR should be shared */
if (is_shared_mr(access_flags)) {
/* start address and length must be aligned to page size in order
to map a full page and preventing leakage of data */
if (mr->umem->offset || (length & ~PAGE_MASK)) {
err = -EINVAL;
goto err_mr;
}
err = prepare_shared_mr(mr, access_flags, mr_id);
if (err)
goto err_mr;
}
#endif
if (ib_peer_mem) {
if (access_flags & IB_ACCESS_MW_BIND) {
/* Prevent binding MW on peer clients.
* mlx4_invalidate_umem must be void,
* therefore, mlx4_mr_free should not fail
* when using peer clients. */
err = -ENOSYS;
pr_err("MW is not supported with peer memory client");
goto err_smr;
}
init_completion(&mr->invalidation_comp);
ib_umem_activate_invalidation_notifier(mr->umem,
mlx4_invalidate_umem, mr);
}
atomic_set(&mr->invalidated, 0);
return &mr->ibmr;
err_smr:
/* No suuport for Shared MR */
#if 0
if (mr->smr_info)
free_smr_info(mr);
#endif
err_mr:
(void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
@ -612,44 +189,159 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
return ERR_PTR(err);
}
int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
u64 start, u64 length, u64 virt_addr,
int mr_access_flags, struct ib_pd *pd,
struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(mr->device);
struct mlx4_ib_mr *mmr = to_mmr(mr);
struct mlx4_mpt_entry *mpt_entry;
struct mlx4_mpt_entry **pmpt_entry = &mpt_entry;
int err;
/* Since we synchronize this call and mlx4_ib_dereg_mr via uverbs,
* we assume that the calls can't run concurrently. Otherwise, a
* race exists.
*/
err = mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry);
if (err)
return err;
if (flags & IB_MR_REREG_PD) {
err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry,
to_mpd(pd)->pdn);
if (err)
goto release_mpt_entry;
}
if (flags & IB_MR_REREG_ACCESS) {
err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry,
convert_access(mr_access_flags));
if (err)
goto release_mpt_entry;
}
if (flags & IB_MR_REREG_TRANS) {
int shift;
int n;
mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
ib_umem_release(mmr->umem);
mmr->umem = ib_umem_get(mr->uobject->context, start, length,
mr_access_flags |
IB_ACCESS_LOCAL_WRITE,
0);
if (IS_ERR(mmr->umem)) {
err = PTR_ERR(mmr->umem);
/* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */
mmr->umem = NULL;
goto release_mpt_entry;
}
n = ib_umem_page_count(mmr->umem);
shift = ilog2(mmr->umem->page_size);
err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr,
virt_addr, length, n, shift,
*pmpt_entry);
if (err) {
ib_umem_release(mmr->umem);
goto release_mpt_entry;
}
mmr->mmr.iova = virt_addr;
mmr->mmr.size = length;
err = mlx4_ib_umem_write_mtt(dev, &mmr->mmr.mtt, mmr->umem);
if (err) {
mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
ib_umem_release(mmr->umem);
goto release_mpt_entry;
}
}
/* If we couldn't transfer the MR to the HCA, just remember to
* return a failure. But dereg_mr will free the resources.
*/
err = mlx4_mr_hw_write_mpt(dev->dev, &mmr->mmr, pmpt_entry);
if (!err && flags & IB_MR_REREG_ACCESS)
mmr->mmr.access = mr_access_flags;
release_mpt_entry:
mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry);
return err;
}
static int
mlx4_alloc_priv_pages(struct ib_device *device,
struct mlx4_ib_mr *mr,
int max_pages)
{
int ret;
/* Ensure that size is aligned to DMA cacheline
* requirements.
* max_pages is limited to MLX4_MAX_FAST_REG_PAGES
* so page_map_size will never cross PAGE_SIZE.
*/
mr->page_map_size = roundup(max_pages * sizeof(u64),
MLX4_MR_PAGES_ALIGN);
/* Prevent cross page boundary allocation. */
mr->pages = (__be64 *)get_zeroed_page(GFP_KERNEL);
if (!mr->pages)
return -ENOMEM;
mr->page_map = dma_map_single(device->dma_device, mr->pages,
mr->page_map_size, DMA_TO_DEVICE);
if (dma_mapping_error(device->dma_device, mr->page_map)) {
ret = -ENOMEM;
goto err;
}
return 0;
err:
free_page((unsigned long)mr->pages);
return ret;
}
static void
mlx4_free_priv_pages(struct mlx4_ib_mr *mr)
{
if (mr->pages) {
struct ib_device *device = mr->ibmr.device;
dma_unmap_single(device->dma_device, mr->page_map,
mr->page_map_size, DMA_TO_DEVICE);
free_page((unsigned long)mr->pages);
mr->pages = NULL;
}
}
int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
{
struct mlx4_ib_mr *mr = to_mmr(ibmr);
struct ib_umem *umem = mr->umem;
int ret;
/* No suuport for Shared MR */
#if 0
if (mr->smr_info)
free_smr_info(mr);
#endif
if (atomic_inc_return(&mr->invalidated) > 1) {
wait_for_completion(&mr->invalidation_comp);
goto end;
}
mlx4_free_priv_pages(mr);
ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
if (ret) {
/* Error is not expected here, except when memory windows
* are bound to MR which is not supported with
* peer memory clients */
atomic_set(&mr->invalidated, 0);
if (ret)
return ret;
}
if (!umem)
goto end;
ib_umem_release(mr->umem);
end:
if (mr->umem)
ib_umem_release(mr->umem);
kfree(mr);
return 0;
}
struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_mw *mw;
@ -659,7 +351,8 @@ struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
if (!mw)
return ERR_PTR(-ENOMEM);
err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn, (enum mlx4_mw_type)type, &mw->mmw);
err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn,
to_mlx4_type(type), &mw->mmw);
if (err)
goto err_free;
@ -680,28 +373,6 @@ struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
return ERR_PTR(err);
}
int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
struct ib_mw_bind *mw_bind)
{
struct ib_send_wr wr;
struct ib_send_wr *bad_wr;
int ret;
memset(&wr, 0, sizeof(wr));
wr.opcode = IB_WR_BIND_MW;
wr.wr_id = mw_bind->wr_id;
wr.send_flags = mw_bind->send_flags;
wr.wr.bind_mw.mw = mw;
wr.wr.bind_mw.bind_info = mw_bind->bind_info;
wr.wr.bind_mw.rkey = ib_inc_rkey(mw->rkey);
ret = mlx4_ib_post_send(qp, &wr, &bad_wr);
if (!ret)
mw->rkey = wr.wr.bind_mw.rkey;
return ret;
}
int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
{
struct mlx4_ib_mw *mw = to_mmw(ibmw);
@ -712,85 +383,51 @@ int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
return 0;
}
struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
int max_page_list_len)
struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
u32 max_num_sg)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_mr *mr;
int err;
mr = kzalloc(sizeof *mr, GFP_KERNEL);
if (mr_type != IB_MR_TYPE_MEM_REG ||
max_num_sg > MLX4_MAX_FAST_REG_PAGES)
return ERR_PTR(-EINVAL);
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0,
max_page_list_len, 0, &mr->mmr);
max_num_sg, 0, &mr->mmr);
if (err)
goto err_free;
err = mlx4_alloc_priv_pages(pd->device, mr, max_num_sg);
if (err)
goto err_free_mr;
mr->max_pages = max_num_sg;
err = mlx4_mr_enable(dev->dev, &mr->mmr);
if (err)
goto err_mr;
goto err_free_pl;
mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
mr->umem = NULL;
return &mr->ibmr;
err_mr:
err_free_pl:
mlx4_free_priv_pages(mr);
err_free_mr:
(void) mlx4_mr_free(dev->dev, &mr->mmr);
err_free:
kfree(mr);
return ERR_PTR(err);
}
struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
int page_list_len)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_fast_reg_page_list *mfrpl;
int size = page_list_len * sizeof (u64);
if (page_list_len > MLX4_MAX_FAST_REG_PAGES)
return ERR_PTR(-EINVAL);
mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL);
if (!mfrpl)
return ERR_PTR(-ENOMEM);
mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
if (!mfrpl->ibfrpl.page_list)
goto err_free;
mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->pdev->dev,
size, &mfrpl->map,
GFP_KERNEL);
if (!mfrpl->mapped_page_list)
goto err_free;
WARN_ON(mfrpl->map & 0x3f);
return &mfrpl->ibfrpl;
err_free:
kfree(mfrpl->ibfrpl.page_list);
kfree(mfrpl);
return ERR_PTR(-ENOMEM);
}
void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
{
struct mlx4_ib_dev *dev = to_mdev(page_list->device);
struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
int size = page_list->max_page_list_len * sizeof (u64);
dma_free_coherent(&dev->dev->pdev->dev, size, mfrpl->mapped_page_list,
mfrpl->map);
kfree(mfrpl->ibfrpl.page_list);
kfree(mfrpl);
}
struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
struct ib_fmr_attr *fmr_attr)
{
@ -883,3 +520,34 @@ int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr)
return err;
}
static int mlx4_set_page(struct ib_mr *ibmr, u64 addr)
{
struct mlx4_ib_mr *mr = to_mmr(ibmr);
if (unlikely(mr->npages == mr->max_pages))
return -ENOMEM;
mr->pages[mr->npages++] = cpu_to_be64(addr | MLX4_MTT_FLAG_PRESENT);
return 0;
}
int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset)
{
struct mlx4_ib_mr *mr = to_mmr(ibmr);
int rc;
mr->npages = 0;
ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map,
mr->page_map_size, DMA_TO_DEVICE);
rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx4_set_page);
ib_dma_sync_single_for_device(ibmr->device, mr->page_map,
mr->page_map_size, DMA_TO_DEVICE);
return rc;
}

File diff suppressed because it is too large Load Diff

View File

@ -36,7 +36,7 @@
#include <linux/slab.h>
#include "mlx4_ib.h"
#include "user.h"
#include <rdma/mlx4-abi.h>
static void *get_wqe(struct mlx4_ib_srq *srq, int n)
{
@ -134,13 +134,14 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
if (err)
goto err_mtt;
} else {
err = mlx4_db_alloc(dev->dev, &srq->db, 0);
err = mlx4_db_alloc(dev->dev, &srq->db, 0, GFP_KERNEL);
if (err)
goto err_srq;
*srq->db.db = 0;
if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf,
GFP_KERNEL)) {
err = -ENOMEM;
goto err_db;
}
@ -165,14 +166,19 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
if (err)
goto err_buf;
err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf);
err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf, GFP_KERNEL);
if (err)
goto err_mtt;
srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
srq->wrid = kmalloc_array(srq->msrq.max, sizeof(u64),
GFP_KERNEL | __GFP_NOWARN);
if (!srq->wrid) {
err = -ENOMEM;
goto err_mtt;
srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64),
GFP_KERNEL, 0 /*PAGE_KERNEL*/);
if (!srq->wrid) {
err = -ENOMEM;
goto err_mtt;
}
}
}
@ -203,7 +209,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
if (pd->uobject)
mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
else
kfree(srq->wrid);
kvfree(srq->wrid);
err_mtt:
mlx4_mtt_cleanup(dev->dev, &srq->mtt);
@ -280,7 +286,7 @@ int mlx4_ib_destroy_srq(struct ib_srq *srq)
mlx4_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
ib_umem_release(msrq->umem);
} else {
kfree(msrq->wrid);
kvfree(msrq->wrid);
mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift,
&msrq->buf);
mlx4_db_free(dev->dev, &msrq->db);
@ -315,8 +321,15 @@ int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
int err = 0;
int nreq;
int i;
struct mlx4_ib_dev *mdev = to_mdev(ibsrq->device);
spin_lock_irqsave(&srq->lock, flags);
if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
err = -EIO;
*bad_wr = wr;
nreq = 0;
goto out;
}
for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (unlikely(wr->num_sge > srq->msrq.max_gs)) {
@ -361,6 +374,7 @@ int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
*srq->db.db = cpu_to_be32(srq->wqe_ctr);
}
out:
spin_unlock_irqrestore(&srq->lock, flags);

View File

@ -46,21 +46,17 @@
static ssize_t show_admin_alias_guid(struct device *dev,
struct device_attribute *attr, char *buf)
{
int record_num;/*0-15*/
int guid_index_in_rec; /*0 - 7*/
struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
struct mlx4_ib_dev *mdev = port->dev;
__be64 sysadmin_ag_val;
record_num = mlx4_ib_iov_dentry->entry_num / 8 ;
guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8 ;
sysadmin_ag_val = mlx4_get_admin_guid(mdev->dev,
mlx4_ib_iov_dentry->entry_num,
port->num);
return sprintf(buf, "%llx\n",
(long long)be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid.
ports_guid[port->num - 1].
all_rec_per_port[record_num].
all_recs[8 * guid_index_in_rec]));
return sprintf(buf, "%llx\n", (long long)be64_to_cpu(sysadmin_ag_val));
}
/* store_admin_alias_guid stores the (new) administratively assigned value of that GUID.
@ -80,6 +76,7 @@ static ssize_t store_admin_alias_guid(struct device *dev,
struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
struct mlx4_ib_dev *mdev = port->dev;
unsigned long long sysadmin_ag_val;
unsigned long flags;
record_num = mlx4_ib_iov_dentry->entry_num / 8;
guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8;
@ -87,6 +84,7 @@ static ssize_t store_admin_alias_guid(struct device *dev,
pr_err("GUID 0 block 0 is RO\n");
return count;
}
spin_lock_irqsave(&mdev->sriov.alias_guid.ag_work_lock, flags);
sscanf(buf, "%llx", &sysadmin_ag_val);
*(__be64 *)&mdev->sriov.alias_guid.ports_guid[port->num - 1].
all_rec_per_port[record_num].
@ -96,33 +94,15 @@ static ssize_t store_admin_alias_guid(struct device *dev,
/* Change the state to be pending for update */
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].status
= MLX4_GUID_INFO_STATUS_IDLE ;
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
= MLX4_GUID_INFO_RECORD_SET;
switch (sysadmin_ag_val) {
case MLX4_GUID_FOR_DELETE_VAL:
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
= MLX4_GUID_INFO_RECORD_DELETE;
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
= MLX4_GUID_SYSADMIN_ASSIGN;
break;
/* The sysadmin requests the SM to re-assign */
case MLX4_NOT_SET_GUID:
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
= MLX4_GUID_DRIVER_ASSIGN;
break;
/* The sysadmin requests a specific value.*/
default:
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
= MLX4_GUID_SYSADMIN_ASSIGN;
break;
}
mlx4_set_admin_guid(mdev->dev, cpu_to_be64(sysadmin_ag_val),
mlx4_ib_iov_dentry->entry_num,
port->num);
/* set the record index */
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].guid_indexes
= mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec);
|= mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec);
spin_unlock_irqrestore(&mdev->sriov.alias_guid.ag_work_lock, flags);
mlx4_ib_init_alias_guid_work(mdev, port->num - 1);
return count;
@ -375,7 +355,7 @@ static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max)
char base_name[9];
/* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */
strlcpy(name, pci_name(dev->dev->pdev), max);
strlcpy(name, pci_name(dev->dev->persist->pdev), max);
strncpy(base_name, name, 8); /*till xxxx:yy:*/
base_name[8] = '\0';
/* with no ARI only 3 last bits are used so when the fn is higher than 8
@ -389,8 +369,10 @@ struct mlx4_port {
struct mlx4_ib_dev *dev;
struct attribute_group pkey_group;
struct attribute_group gid_group;
u8 port_num;
struct device_attribute enable_smi_admin;
struct device_attribute smi_enabled;
int slave;
u8 port_num;
};
@ -558,6 +540,101 @@ alloc_group_attrs(ssize_t (*show)(struct mlx4_port *,
return NULL;
}
static ssize_t sysfs_show_smi_enabled(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct mlx4_port *p =
container_of(attr, struct mlx4_port, smi_enabled);
ssize_t len = 0;
if (mlx4_vf_smi_enabled(p->dev->dev, p->slave, p->port_num))
len = sprintf(buf, "%d\n", 1);
else
len = sprintf(buf, "%d\n", 0);
return len;
}
static ssize_t sysfs_show_enable_smi_admin(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct mlx4_port *p =
container_of(attr, struct mlx4_port, enable_smi_admin);
ssize_t len = 0;
if (mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, p->port_num))
len = sprintf(buf, "%d\n", 1);
else
len = sprintf(buf, "%d\n", 0);
return len;
}
static ssize_t sysfs_store_enable_smi_admin(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct mlx4_port *p =
container_of(attr, struct mlx4_port, enable_smi_admin);
int enable;
if (sscanf(buf, "%i", &enable) != 1 ||
enable < 0 || enable > 1)
return -EINVAL;
if (mlx4_vf_set_enable_smi_admin(p->dev->dev, p->slave, p->port_num, enable))
return -EINVAL;
return count;
}
static int add_vf_smi_entries(struct mlx4_port *p)
{
int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) ==
IB_LINK_LAYER_ETHERNET;
int ret;
/* do not display entries if eth transport, or if master */
if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev))
return 0;
sysfs_attr_init(&p->smi_enabled.attr);
p->smi_enabled.show = sysfs_show_smi_enabled;
p->smi_enabled.store = NULL;
p->smi_enabled.attr.name = "smi_enabled";
p->smi_enabled.attr.mode = 0444;
ret = sysfs_create_file(&p->kobj, &p->smi_enabled.attr);
if (ret) {
pr_err("failed to create smi_enabled\n");
return ret;
}
sysfs_attr_init(&p->enable_smi_admin.attr);
p->enable_smi_admin.show = sysfs_show_enable_smi_admin;
p->enable_smi_admin.store = sysfs_store_enable_smi_admin;
p->enable_smi_admin.attr.name = "enable_smi_admin";
p->enable_smi_admin.attr.mode = 0644;
ret = sysfs_create_file(&p->kobj, &p->enable_smi_admin.attr);
if (ret) {
pr_err("failed to create enable_smi_admin\n");
sysfs_remove_file(&p->kobj, &p->smi_enabled.attr);
return ret;
}
return 0;
}
static void remove_vf_smi_entries(struct mlx4_port *p)
{
int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) ==
IB_LINK_LAYER_ETHERNET;
if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev))
return;
sysfs_remove_file(&p->kobj, &p->smi_enabled.attr);
sysfs_remove_file(&p->kobj, &p->enable_smi_admin.attr);
}
static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
{
struct mlx4_port *p;
@ -581,16 +658,14 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
goto err_alloc;
p->pkey_group.name = "pkey_idx";
if (is_eth)
p->pkey_group.attrs =
alloc_group_attrs(show_port_pkey, NULL,
dev->dev->caps.pkey_table_len[port_num]);
else
p->pkey_group.attrs =
alloc_group_attrs(show_port_pkey, store_port_pkey,
dev->dev->caps.pkey_table_len[port_num]);
if (!p->pkey_group.attrs)
p->pkey_group.attrs =
alloc_group_attrs(show_port_pkey,
is_eth ? NULL : store_port_pkey,
dev->dev->caps.pkey_table_len[port_num]);
if (!p->pkey_group.attrs) {
ret = -ENOMEM;
goto err_alloc;
}
ret = sysfs_create_group(&p->kobj, &p->pkey_group);
if (ret)
@ -598,13 +673,19 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
p->gid_group.name = "gid_idx";
p->gid_group.attrs = alloc_group_attrs(show_port_gid_idx, NULL, 1);
if (!p->gid_group.attrs)
if (!p->gid_group.attrs) {
ret = -ENOMEM;
goto err_free_pkey;
}
ret = sysfs_create_group(&p->kobj, &p->gid_group);
if (ret)
goto err_free_gid;
ret = add_vf_smi_entries(p);
if (ret)
goto err_free_gid;
list_add_tail(&p->kobj.entry, &dev->pkeys.pkey_port_list[slave]);
return 0;
@ -630,6 +711,7 @@ static int register_one_pkey_tree(struct mlx4_ib_dev *dev, int slave)
int port;
struct kobject *p, *t;
struct mlx4_port *mport;
struct mlx4_active_ports actv_ports;
get_name(dev, name, slave, sizeof name);
@ -652,7 +734,11 @@ static int register_one_pkey_tree(struct mlx4_ib_dev *dev, int slave)
goto err_ports;
}
actv_ports = mlx4_get_active_ports(dev->dev, slave);
for (port = 1; port <= dev->dev->caps.num_ports; ++port) {
if (!test_bit(port - 1, actv_ports.ports))
continue;
err = add_port(dev, port, slave);
if (err)
goto err_add;
@ -667,6 +753,7 @@ static int register_one_pkey_tree(struct mlx4_ib_dev *dev, int slave)
mport = container_of(p, struct mlx4_port, kobj);
sysfs_remove_group(p, &mport->pkey_group);
sysfs_remove_group(p, &mport->gid_group);
remove_vf_smi_entries(mport);
kobject_put(p);
}
kobject_put(dev->dev_ports_parent[slave]);
@ -688,7 +775,7 @@ static int register_pkey_tree(struct mlx4_ib_dev *device)
if (!mlx4_is_master(device->dev))
return 0;
for (i = 0; i <= device->dev->num_vfs; ++i)
for (i = 0; i <= device->dev->persist->num_vfs; ++i)
register_one_pkey_tree(device, i);
return 0;
@ -703,7 +790,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device)
if (!mlx4_is_master(device->dev))
return;
for (slave = device->dev->num_vfs; slave >= 0; --slave) {
for (slave = device->dev->persist->num_vfs; slave >= 0; --slave) {
list_for_each_entry_safe(p, t,
&device->pkeys.pkey_port_list[slave],
entry) {
@ -711,6 +798,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device)
port = container_of(p, struct mlx4_port, kobj);
sysfs_remove_group(p, &port->pkey_group);
sysfs_remove_group(p, &port->gid_group);
remove_vf_smi_entries(port);
kobject_put(p);
kobject_put(device->dev_ports_parent[slave]);
}
@ -739,7 +827,7 @@ int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev)
dev->ports_parent =
kobject_create_and_add("ports",
kobject_get(dev->iov_parent));
if (!dev->iov_parent) {
if (!dev->ports_parent) {
ret = -ENOMEM;
goto err_ports;
}

View File

@ -1,107 +0,0 @@
/*
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef MLX4_IB_USER_H
#define MLX4_IB_USER_H
#include <linux/types.h>
/*
* Increment this value if any changes that break userspace ABI
* compatibility are made.
*/
#define MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION 3
#define MLX4_IB_UVERBS_ABI_VERSION 4
/*
* Make sure that all structs defined in this file remain laid out so
* that they pack the same way on 32-bit and 64-bit architectures (to
* avoid incompatibility between 32-bit userspace and 64-bit kernels).
* In particular do not use pointer types -- pass pointers in __u64
* instead.
*/
struct mlx4_ib_alloc_ucontext_resp_v3 {
__u32 qp_tab_size;
__u16 bf_reg_size;
__u16 bf_regs_per_page;
};
struct mlx4_ib_alloc_ucontext_resp {
__u32 dev_caps;
__u32 qp_tab_size;
__u16 bf_reg_size;
__u16 bf_regs_per_page;
__u32 cqe_size;
};
struct mlx4_ib_alloc_pd_resp {
__u32 pdn;
__u32 reserved;
};
struct mlx4_ib_create_cq {
__u64 buf_addr;
__u64 db_addr;
};
struct mlx4_ib_create_cq_resp {
__u32 cqn;
__u32 reserved;
};
struct mlx4_ib_resize_cq {
__u64 buf_addr;
};
struct mlx4_ib_create_srq {
__u64 buf_addr;
__u64 db_addr;
};
struct mlx4_ib_create_srq_resp {
__u32 srqn;
__u32 reserved;
};
struct mlx4_ib_create_qp {
__u64 buf_addr;
__u64 db_addr;
__u8 log_sq_bb_count;
__u8 log_sq_stride;
__u8 sq_no_prefetch;
__u8 reserved[5];
};
#endif /* MLX4_IB_USER_H */

View File

@ -9,7 +9,6 @@ SRCS= device_if.h bus_if.h vnode_if.h pci_if.h \
mlx4_ib_sysfs.c \
mlx4_ib_ah.c \
mlx4_ib_cq.c \
mlx4_ib_exp.c \
mlx4_ib_doorbell.c \
mlx4_ib_mad.c \
mlx4_ib_main.c \
@ -20,6 +19,7 @@ SRCS= device_if.h bus_if.h vnode_if.h pci_if.h \
mlx4_ib_cm.c
CFLAGS+= -I${SRCTOP}/sys/ofed/include
CFLAGS+= -I${SRCTOP}/sys/ofed/include/uapi
CFLAGS+= -I${SRCTOP}/sys/compat/linuxkpi/common/include
CFLAGS+= -DCONFIG_INFINIBAND_USER_MEM
CFLAGS+= -DINET6 -DINET