Bring in the Mellanox implementation of iSER (iSCSI over RDMA) initiator,
written by Sagi Grimberg <sagig at mellanox.com> and Max Gurtovoy <maxg at mellanox.com>. This code comes from https://github.com/sagigrimberg/iser-freebsd, branch iser-rebase-11-current-r291993. It's not connected to the build just yet; it still needs some tweaks to adapt to my changes to iSCSI infrastructure. Big thanks to Mellanox for their support for FreeBSD! Obtained from: Mellanox Technologies MFC after: 1 month Relnotes: yes
This commit is contained in:
parent
f159d7d6f0
commit
4814a0a4ce
582
sys/dev/iser/icl_iser.c
Normal file
582
sys/dev/iser/icl_iser.c
Normal file
@ -0,0 +1,582 @@
|
||||
/* $FreeBSD$ */
|
||||
/*-
|
||||
* Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "icl_iser.h"
|
||||
|
||||
SYSCTL_NODE(_kern, OID_AUTO, iser, CTLFLAG_RW, 0, "iSER module");
|
||||
int iser_debug = 0;
|
||||
SYSCTL_INT(_kern_iser, OID_AUTO, debug, CTLFLAG_RWTUN,
|
||||
&iser_debug, 0, "Enable iser debug messages");
|
||||
|
||||
static MALLOC_DEFINE(M_ICL_ISER, "icl_iser", "iSCSI iser backend");
|
||||
static uma_zone_t icl_pdu_zone;
|
||||
|
||||
static volatile u_int icl_iser_ncons;
|
||||
struct iser_global ig;
|
||||
|
||||
static icl_conn_new_pdu_t iser_conn_new_pdu;
|
||||
static icl_conn_pdu_free_t iser_conn_pdu_free;
|
||||
static icl_conn_pdu_data_segment_length_t iser_conn_pdu_data_segment_length;
|
||||
static icl_conn_pdu_append_data_t iser_conn_pdu_append_data;
|
||||
static icl_conn_pdu_queue_t iser_conn_pdu_queue;
|
||||
static icl_conn_handoff_t iser_conn_handoff;
|
||||
static icl_conn_free_t iser_conn_free;
|
||||
static icl_conn_close_t iser_conn_close;
|
||||
static icl_conn_release_t iser_conn_release;
|
||||
static icl_conn_connect_t iser_conn_connect;
|
||||
static icl_conn_connected_t iser_conn_connected;
|
||||
static icl_conn_task_setup_t iser_conn_task_setup;
|
||||
static icl_conn_task_done_t iser_conn_task_done;
|
||||
static icl_conn_pdu_get_data_t iser_conn_pdu_get_data;
|
||||
|
||||
static kobj_method_t icl_iser_methods[] = {
|
||||
KOBJMETHOD(icl_conn_new_pdu, iser_conn_new_pdu),
|
||||
KOBJMETHOD(icl_conn_pdu_free, iser_conn_pdu_free),
|
||||
KOBJMETHOD(icl_conn_pdu_data_segment_length, iser_conn_pdu_data_segment_length),
|
||||
KOBJMETHOD(icl_conn_pdu_append_data, iser_conn_pdu_append_data),
|
||||
KOBJMETHOD(icl_conn_pdu_queue, iser_conn_pdu_queue),
|
||||
KOBJMETHOD(icl_conn_handoff, iser_conn_handoff),
|
||||
KOBJMETHOD(icl_conn_free, iser_conn_free),
|
||||
KOBJMETHOD(icl_conn_close, iser_conn_close),
|
||||
KOBJMETHOD(icl_conn_release, iser_conn_release),
|
||||
KOBJMETHOD(icl_conn_connect, iser_conn_connect),
|
||||
KOBJMETHOD(icl_conn_connected, iser_conn_connected),
|
||||
KOBJMETHOD(icl_conn_task_setup, iser_conn_task_setup),
|
||||
KOBJMETHOD(icl_conn_task_done, iser_conn_task_done),
|
||||
KOBJMETHOD(icl_conn_pdu_get_data, iser_conn_pdu_get_data),
|
||||
{ 0, 0 }
|
||||
};
|
||||
|
||||
DEFINE_CLASS(icl_iser, icl_iser_methods, sizeof(struct iser_conn));
|
||||
|
||||
/**
|
||||
* iser_initialize_headers() - Initialize task headers
|
||||
* @pdu: iser pdu
|
||||
* @iser_conn: iser connection
|
||||
*
|
||||
* Notes:
|
||||
* This routine may race with iser teardown flow for scsi
|
||||
* error handling TMFs. So for TMF we should acquire the
|
||||
* state mutex to avoid dereferencing the IB device which
|
||||
* may have already been terminated (racing teardown sequence).
|
||||
*/
|
||||
int
|
||||
iser_initialize_headers(struct icl_iser_pdu *pdu, struct iser_conn *iser_conn)
|
||||
{
|
||||
struct iser_tx_desc *tx_desc = &pdu->desc;
|
||||
struct iser_device *device = iser_conn->ib_conn.device;
|
||||
u64 dma_addr;
|
||||
int ret = 0;
|
||||
|
||||
dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc,
|
||||
ISER_HEADERS_LEN, DMA_TO_DEVICE);
|
||||
if (ib_dma_mapping_error(device->ib_device, dma_addr)) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
tx_desc->mapped = true;
|
||||
tx_desc->dma_addr = dma_addr;
|
||||
tx_desc->tx_sg[0].addr = tx_desc->dma_addr;
|
||||
tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
|
||||
tx_desc->tx_sg[0].lkey = device->mr->lkey;
|
||||
|
||||
out:
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
int
|
||||
iser_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request,
|
||||
const void *addr, size_t len, int flags)
|
||||
{
|
||||
struct iser_conn *iser_conn = icl_to_iser_conn(ic);
|
||||
|
||||
if (request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_LOGIN_REQUEST ||
|
||||
request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_TEXT_REQUEST) {
|
||||
ISER_DBG("copy to login buff");
|
||||
memcpy(iser_conn->login_req_buf, addr, len);
|
||||
request->ip_data_len = len;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
iser_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip,
|
||||
size_t off, void *addr, size_t len)
|
||||
{
|
||||
/* If we have a receive data, copy it to upper layer buffer */
|
||||
if (ip->ip_data_mbuf)
|
||||
memcpy(addr, ip->ip_data_mbuf + off, len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate icl_pdu with empty BHS to fill up by the caller.
|
||||
*/
|
||||
struct icl_pdu *
|
||||
iser_new_pdu(struct icl_conn *ic, int flags)
|
||||
{
|
||||
struct icl_iser_pdu *iser_pdu;
|
||||
struct icl_pdu *ip;
|
||||
struct iser_conn *iser_conn = icl_to_iser_conn(ic);
|
||||
|
||||
iser_pdu = uma_zalloc(icl_pdu_zone, flags | M_ZERO);
|
||||
if (iser_pdu == NULL) {
|
||||
ISER_WARN("failed to allocate %zd bytes", sizeof(*iser_pdu));
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
iser_pdu->iser_conn = iser_conn;
|
||||
ip = &iser_pdu->icl_pdu;
|
||||
ip->ip_conn = ic;
|
||||
ip->ip_bhs = &iser_pdu->desc.iscsi_header;
|
||||
|
||||
return (ip);
|
||||
}
|
||||
|
||||
struct icl_pdu *
|
||||
iser_conn_new_pdu(struct icl_conn *ic, int flags)
|
||||
{
|
||||
return (iser_new_pdu(ic, flags));
|
||||
}
|
||||
|
||||
void
|
||||
iser_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
|
||||
{
|
||||
struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
|
||||
|
||||
uma_zfree(icl_pdu_zone, iser_pdu);
|
||||
}
|
||||
|
||||
size_t
|
||||
iser_conn_pdu_data_segment_length(struct icl_conn *ic,
|
||||
const struct icl_pdu *request)
|
||||
{
|
||||
uint32_t len = 0;
|
||||
|
||||
len += request->ip_bhs->bhs_data_segment_len[0];
|
||||
len <<= 8;
|
||||
len += request->ip_bhs->bhs_data_segment_len[1];
|
||||
len <<= 8;
|
||||
len += request->ip_bhs->bhs_data_segment_len[2];
|
||||
|
||||
return (len);
|
||||
}
|
||||
|
||||
void
|
||||
iser_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
|
||||
{
|
||||
iser_pdu_free(ic, ip);
|
||||
}
|
||||
|
||||
static bool
|
||||
is_control_opcode(uint8_t opcode)
|
||||
{
|
||||
bool is_control = false;
|
||||
|
||||
switch (opcode & ISCSI_OPCODE_MASK) {
|
||||
case ISCSI_BHS_OPCODE_NOP_OUT:
|
||||
case ISCSI_BHS_OPCODE_LOGIN_REQUEST:
|
||||
case ISCSI_BHS_OPCODE_LOGOUT_REQUEST:
|
||||
case ISCSI_BHS_OPCODE_TEXT_REQUEST:
|
||||
is_control = true;
|
||||
break;
|
||||
case ISCSI_BHS_OPCODE_SCSI_COMMAND:
|
||||
is_control = false;
|
||||
break;
|
||||
default:
|
||||
ISER_ERR("unknown opcode %d", opcode);
|
||||
}
|
||||
|
||||
return (is_control);
|
||||
}
|
||||
|
||||
void
|
||||
iser_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip)
|
||||
{
|
||||
struct iser_conn *iser_conn = icl_to_iser_conn(ic);
|
||||
struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
|
||||
int ret;
|
||||
|
||||
ret = iser_initialize_headers(iser_pdu, iser_conn);
|
||||
if (ret) {
|
||||
ISER_ERR("Failed to map TX descriptor pdu %p", iser_pdu);
|
||||
return;
|
||||
}
|
||||
|
||||
if (is_control_opcode(ip->ip_bhs->bhs_opcode)) {
|
||||
ret = iser_send_control(iser_conn, iser_pdu);
|
||||
if (unlikely(ret))
|
||||
ISER_ERR("Failed to send control pdu %p", iser_pdu);
|
||||
} else {
|
||||
ret = iser_send_command(iser_conn, iser_pdu);
|
||||
if (unlikely(ret))
|
||||
ISER_ERR("Failed to send command pdu %p", iser_pdu);
|
||||
}
|
||||
}
|
||||
|
||||
static struct icl_conn *
|
||||
iser_new_conn(const char *name, struct mtx *lock)
|
||||
{
|
||||
struct iser_conn *iser_conn;
|
||||
struct icl_conn *ic;
|
||||
|
||||
refcount_acquire(&icl_iser_ncons);
|
||||
|
||||
iser_conn = (struct iser_conn *)kobj_create(&icl_iser_class, M_ICL_ISER, M_WAITOK | M_ZERO);
|
||||
if (!iser_conn) {
|
||||
ISER_ERR("failed to allocate iser conn");
|
||||
refcount_release(&icl_iser_ncons);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
cv_init(&iser_conn->up_cv, "iser_cv");
|
||||
sx_init(&iser_conn->state_mutex, "iser_conn_state_mutex");
|
||||
mtx_init(&iser_conn->ib_conn.beacon.flush_lock, "flush_lock", NULL, MTX_DEF);
|
||||
cv_init(&iser_conn->ib_conn.beacon.flush_cv, "flush_cv");
|
||||
mtx_init(&iser_conn->ib_conn.lock, "lock", NULL, MTX_DEF);
|
||||
|
||||
ic = &iser_conn->icl_conn;
|
||||
ic->ic_lock = lock;
|
||||
ic->ic_name = name;
|
||||
ic->ic_driver = strdup("iser", M_TEMP);
|
||||
ic->ic_iser = true;
|
||||
|
||||
return (ic);
|
||||
}
|
||||
|
||||
void
|
||||
iser_conn_free(struct icl_conn *ic)
|
||||
{
|
||||
struct iser_conn *iser_conn = icl_to_iser_conn(ic);
|
||||
|
||||
cv_destroy(&iser_conn->ib_conn.beacon.flush_cv);
|
||||
mtx_destroy(&iser_conn->ib_conn.beacon.flush_lock);
|
||||
sx_destroy(&iser_conn->state_mutex);
|
||||
cv_destroy(&iser_conn->up_cv);
|
||||
kobj_delete((struct kobj *)iser_conn, M_ICL_ISER);
|
||||
refcount_release(&icl_iser_ncons);
|
||||
}
|
||||
|
||||
int
|
||||
iser_conn_handoff(struct icl_conn *ic, int cmds_max)
|
||||
{
|
||||
struct iser_conn *iser_conn = icl_to_iser_conn(ic);
|
||||
int error = 0;
|
||||
|
||||
sx_xlock(&iser_conn->state_mutex);
|
||||
if (iser_conn->state != ISER_CONN_UP) {
|
||||
error = EINVAL;
|
||||
ISER_ERR("iser_conn %p state is %d, teardown started\n",
|
||||
iser_conn, iser_conn->state);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* In discovery session no need to allocate rx desc and posting recv
|
||||
* work request
|
||||
*/
|
||||
if (ic->ic_session_type_discovery(ic))
|
||||
goto out;
|
||||
|
||||
error = iser_alloc_rx_descriptors(iser_conn, cmds_max);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
error = iser_post_recvm(iser_conn, iser_conn->min_posted_rx);
|
||||
if (error)
|
||||
goto post_error;
|
||||
|
||||
sx_xunlock(&iser_conn->state_mutex);
|
||||
return (error);
|
||||
|
||||
post_error:
|
||||
iser_free_rx_descriptors(iser_conn);
|
||||
out:
|
||||
sx_xunlock(&iser_conn->state_mutex);
|
||||
return (error);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Frees all conn objects
|
||||
*/
|
||||
void
|
||||
iser_conn_release(struct icl_conn *ic)
|
||||
{
|
||||
struct iser_conn *iser_conn = icl_to_iser_conn(ic);
|
||||
struct ib_conn *ib_conn = &iser_conn->ib_conn;
|
||||
struct iser_conn *curr, *tmp;
|
||||
|
||||
mtx_lock(&ig.connlist_mutex);
|
||||
/*
|
||||
* Search for iser connection in global list.
|
||||
* It may not be there in case of failure in connection establishment
|
||||
* stage.
|
||||
*/
|
||||
list_for_each_entry_safe(curr, tmp, &ig.connlist, conn_list) {
|
||||
if (iser_conn == curr) {
|
||||
ISER_WARN("found iser_conn %p", iser_conn);
|
||||
list_del(&iser_conn->conn_list);
|
||||
}
|
||||
}
|
||||
mtx_unlock(&ig.connlist_mutex);
|
||||
|
||||
/*
|
||||
* In case we reconnecting or removing session, we need to
|
||||
* release IB resources (which is safe to call more than once).
|
||||
*/
|
||||
sx_xlock(&iser_conn->state_mutex);
|
||||
iser_free_ib_conn_res(iser_conn, true);
|
||||
sx_xunlock(&iser_conn->state_mutex);
|
||||
|
||||
if (ib_conn->cma_id != NULL) {
|
||||
rdma_destroy_id(ib_conn->cma_id);
|
||||
ib_conn->cma_id = NULL;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
iser_conn_close(struct icl_conn *ic)
|
||||
{
|
||||
struct iser_conn *iser_conn = icl_to_iser_conn(ic);
|
||||
|
||||
ISER_INFO("closing conn %p", iser_conn);
|
||||
|
||||
sx_xlock(&iser_conn->state_mutex);
|
||||
/*
|
||||
* In case iser connection is waiting on conditional variable
|
||||
* (state PENDING) and we try to close it before connection establishment,
|
||||
* we need to signal it to continue releasing connection properly.
|
||||
*/
|
||||
if (!iser_conn_terminate(iser_conn) && iser_conn->state == ISER_CONN_PENDING)
|
||||
cv_signal(&iser_conn->up_cv);
|
||||
sx_xunlock(&iser_conn->state_mutex);
|
||||
|
||||
}
|
||||
|
||||
int
|
||||
iser_conn_connect(struct icl_conn *ic, int domain, int socktype,
|
||||
int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa)
|
||||
{
|
||||
struct iser_conn *iser_conn = icl_to_iser_conn(ic);
|
||||
struct ib_conn *ib_conn = &iser_conn->ib_conn;
|
||||
int err = 0;
|
||||
|
||||
sx_xlock(&iser_conn->state_mutex);
|
||||
/* the device is known only --after-- address resolution */
|
||||
ib_conn->device = NULL;
|
||||
|
||||
iser_conn->state = ISER_CONN_PENDING;
|
||||
|
||||
ib_conn->cma_id = rdma_create_id(iser_cma_handler, (void *)iser_conn,
|
||||
RDMA_PS_TCP, IB_QPT_RC);
|
||||
if (IS_ERR(ib_conn->cma_id)) {
|
||||
err = -PTR_ERR(ib_conn->cma_id);
|
||||
ISER_ERR("rdma_create_id failed: %d", err);
|
||||
goto id_failure;
|
||||
}
|
||||
|
||||
err = rdma_resolve_addr(ib_conn->cma_id, from_sa, to_sa, 1000);
|
||||
if (err) {
|
||||
ISER_ERR("rdma_resolve_addr failed: %d", err);
|
||||
if (err < 0)
|
||||
err = -err;
|
||||
goto addr_failure;
|
||||
}
|
||||
|
||||
ISER_DBG("before cv_wait: %p", iser_conn);
|
||||
cv_wait(&iser_conn->up_cv, &iser_conn->state_mutex);
|
||||
ISER_DBG("after cv_wait: %p", iser_conn);
|
||||
|
||||
if (iser_conn->state != ISER_CONN_UP) {
|
||||
err = EIO;
|
||||
goto addr_failure;
|
||||
}
|
||||
|
||||
err = iser_alloc_login_buf(iser_conn);
|
||||
if (err)
|
||||
goto addr_failure;
|
||||
sx_xunlock(&iser_conn->state_mutex);
|
||||
|
||||
mtx_lock(&ig.connlist_mutex);
|
||||
list_add(&iser_conn->conn_list, &ig.connlist);
|
||||
mtx_unlock(&ig.connlist_mutex);
|
||||
|
||||
return (0);
|
||||
|
||||
id_failure:
|
||||
ib_conn->cma_id = NULL;
|
||||
addr_failure:
|
||||
sx_xunlock(&iser_conn->state_mutex);
|
||||
return (err);
|
||||
}
|
||||
|
||||
/**
|
||||
* Called with session spinlock held.
|
||||
* No need to lock state mutex on an advisory check.
|
||||
**/
|
||||
bool
|
||||
iser_conn_connected(struct icl_conn *ic)
|
||||
{
|
||||
struct iser_conn *iser_conn = icl_to_iser_conn(ic);
|
||||
|
||||
return (iser_conn->state == ISER_CONN_UP);
|
||||
}
|
||||
|
||||
int
|
||||
iser_conn_task_setup(struct icl_conn *ic, struct ccb_scsiio *csio,
|
||||
uint32_t *task_tagp, void **prvp, struct icl_pdu *ip)
|
||||
{
|
||||
struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
|
||||
|
||||
*prvp = ip;
|
||||
iser_pdu->csio = csio;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
iser_conn_task_done(struct icl_conn *ic, void *prv)
|
||||
{
|
||||
struct icl_pdu *ip = prv;
|
||||
struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
|
||||
struct iser_device *device = iser_pdu->iser_conn->ib_conn.device;
|
||||
struct iser_tx_desc *tx_desc = &iser_pdu->desc;
|
||||
|
||||
if (iser_pdu->dir[ISER_DIR_IN]) {
|
||||
iser_unreg_rdma_mem(iser_pdu, ISER_DIR_IN);
|
||||
iser_dma_unmap_task_data(iser_pdu,
|
||||
&iser_pdu->data[ISER_DIR_IN],
|
||||
DMA_FROM_DEVICE);
|
||||
}
|
||||
|
||||
if (iser_pdu->dir[ISER_DIR_OUT]) {
|
||||
iser_unreg_rdma_mem(iser_pdu, ISER_DIR_OUT);
|
||||
iser_dma_unmap_task_data(iser_pdu,
|
||||
&iser_pdu->data[ISER_DIR_OUT],
|
||||
DMA_TO_DEVICE);
|
||||
}
|
||||
|
||||
if (likely(tx_desc->mapped)) {
|
||||
ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
|
||||
ISER_HEADERS_LEN, DMA_TO_DEVICE);
|
||||
tx_desc->mapped = false;
|
||||
}
|
||||
|
||||
iser_pdu_free(ic, ip);
|
||||
}
|
||||
|
||||
static u_int32_t
|
||||
iser_hba_misc()
|
||||
{
|
||||
return (PIM_UNMAPPED);
|
||||
}
|
||||
|
||||
static int
|
||||
iser_limits(size_t *limitp)
|
||||
{
|
||||
*limitp = 128 * 1024;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
icl_iser_load(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
ISER_DBG("Starting iSER datamover...");
|
||||
|
||||
icl_pdu_zone = uma_zcreate("icl_iser_pdu", sizeof(struct icl_iser_pdu),
|
||||
NULL, NULL, NULL, NULL,
|
||||
UMA_ALIGN_PTR, 0);
|
||||
/* FIXME: Check rc */
|
||||
|
||||
refcount_init(&icl_iser_ncons, 0);
|
||||
|
||||
error = icl_register("iser", 0, iser_limits, iser_new_conn, iser_hba_misc);
|
||||
KASSERT(error == 0, ("failed to register iser"));
|
||||
|
||||
memset(&ig, 0, sizeof(struct iser_global));
|
||||
|
||||
/* device init is called only after the first addr resolution */
|
||||
sx_init(&ig.device_list_mutex, "global_device_lock");
|
||||
INIT_LIST_HEAD(&ig.device_list);
|
||||
mtx_init(&ig.connlist_mutex, "global_conn_lock", NULL, MTX_DEF);
|
||||
INIT_LIST_HEAD(&ig.connlist);
|
||||
sx_init(&ig.close_conns_mutex, "global_close_conns_lock");
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
icl_iser_unload(void)
|
||||
{
|
||||
ISER_DBG("Removing iSER datamover...");
|
||||
|
||||
if (icl_iser_ncons != 0)
|
||||
return (EBUSY);
|
||||
|
||||
sx_destroy(&ig.close_conns_mutex);
|
||||
mtx_destroy(&ig.connlist_mutex);
|
||||
sx_destroy(&ig.device_list_mutex);
|
||||
|
||||
icl_unregister("iser");
|
||||
|
||||
uma_zdestroy(icl_pdu_zone);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
icl_iser_modevent(module_t mod, int what, void *arg)
|
||||
{
|
||||
switch (what) {
|
||||
case MOD_LOAD:
|
||||
return (icl_iser_load());
|
||||
case MOD_UNLOAD:
|
||||
return (icl_iser_unload());
|
||||
default:
|
||||
return (EINVAL);
|
||||
}
|
||||
}
|
||||
|
||||
moduledata_t icl_iser_data = {
|
||||
.name = "icl_iser",
|
||||
.evhand = icl_iser_modevent,
|
||||
.priv = 0
|
||||
};
|
||||
|
||||
DECLARE_MODULE(icl_iser, icl_iser_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
|
||||
MODULE_DEPEND(icl_iser, icl, 1, 1, 1);
|
||||
MODULE_DEPEND(icl_iser, iscsi, 1, 1, 1);
|
||||
MODULE_DEPEND(icl_iser, ibcore, 1, 1, 1);
|
||||
MODULE_DEPEND(icl_iser, linuxkpi, 1, 1, 1);
|
||||
MODULE_VERSION(icl_iser, 1);
|
||||
|
547
sys/dev/iser/icl_iser.h
Normal file
547
sys/dev/iser/icl_iser.h
Normal file
@ -0,0 +1,547 @@
|
||||
/* $FreeBSD$ */
|
||||
/*-
|
||||
* Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef ICL_ISER_H
|
||||
#define ICL_ISER_H
|
||||
|
||||
/*
|
||||
* iSCSI Common Layer for RDMA.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/capsicum.h>
|
||||
#include <sys/condvar.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/file.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/kthread.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mbuf.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/module.h>
|
||||
#include <sys/protosw.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/socketvar.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/sx.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/taskqueue.h>
|
||||
#include <sys/bio.h>
|
||||
#include <vm/uma.h>
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/tcp.h>
|
||||
#include <dev/iscsi/icl.h>
|
||||
#include <dev/iscsi/iscsi_proto.h>
|
||||
#include <icl_conn_if.h>
|
||||
#include <cam/cam.h>
|
||||
#include <cam/cam_ccb.h>
|
||||
#include <rdma/ib_verbs.h>
|
||||
#include <rdma/ib_fmr_pool.h>
|
||||
#include <rdma/rdma_cm.h>
|
||||
|
||||
|
||||
#define ISER_DBG(X, ...) \
|
||||
do { \
|
||||
if (unlikely(iser_debug > 2)) \
|
||||
printf("DEBUG: %s: " X "\n", \
|
||||
__func__, ## __VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define ISER_INFO(X, ...) \
|
||||
do { \
|
||||
if (unlikely(iser_debug > 1)) \
|
||||
printf("INFO: %s: " X "\n", \
|
||||
__func__, ## __VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define ISER_WARN(X, ...) \
|
||||
do { \
|
||||
if (unlikely(iser_debug > 0)) { \
|
||||
printf("WARNING: %s: " X "\n", \
|
||||
__func__, ## __VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define ISER_ERR(X, ...) \
|
||||
printf("ERROR: %s: " X "\n", __func__, ## __VA_ARGS__)
|
||||
|
||||
#define ISER_VER 0x10
|
||||
#define ISER_WSV 0x08
|
||||
#define ISER_RSV 0x04
|
||||
|
||||
#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL
|
||||
#define ISER_BEACON_WRID 0xfffffffffffffffeULL
|
||||
|
||||
#define SHIFT_4K 12
|
||||
#define SIZE_4K (1ULL << SHIFT_4K)
|
||||
#define MASK_4K (~(SIZE_4K-1))
|
||||
|
||||
/* support up to 512KB in one RDMA */
|
||||
#define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K)
|
||||
#define ISER_DEF_XMIT_CMDS_MAX 256
|
||||
|
||||
/* the max RX (recv) WR supported by the iSER QP is defined by *
|
||||
* max_recv_wr = commands_max + recv_beacon */
|
||||
#define ISER_QP_MAX_RECV_DTOS (ISER_DEF_XMIT_CMDS_MAX + 1)
|
||||
#define ISER_MIN_POSTED_RX (ISER_DEF_XMIT_CMDS_MAX >> 2)
|
||||
|
||||
/* QP settings */
|
||||
/* Maximal bounds on received asynchronous PDUs */
|
||||
#define ISER_MAX_RX_MISC_PDUS 4 /* NOOP_IN(2) , ASYNC_EVENT(2) */
|
||||
#define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), SCSI_TMFUNC(2), LOGOUT(1) */
|
||||
|
||||
/* the max TX (send) WR supported by the iSER QP is defined by *
|
||||
* max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect *
|
||||
* to have at max for SCSI command. The tx posting & completion handling code *
|
||||
* supports -EAGAIN scheme where tx is suspended till the QP has room for more *
|
||||
* send WR. D=8 comes from 64K/8K */
|
||||
|
||||
#define ISER_INFLIGHT_DATAOUTS 8
|
||||
|
||||
/* the send_beacon increase the max_send_wr by 1 */
|
||||
#define ISER_QP_MAX_REQ_DTOS (ISER_DEF_XMIT_CMDS_MAX * \
|
||||
(1 + ISER_INFLIGHT_DATAOUTS) + \
|
||||
ISER_MAX_TX_MISC_PDUS + \
|
||||
ISER_MAX_RX_MISC_PDUS + 1)
|
||||
|
||||
#define ISER_GET_MAX_XMIT_CMDS(send_wr) ((send_wr \
|
||||
- ISER_MAX_TX_MISC_PDUS \
|
||||
- ISER_MAX_RX_MISC_PDUS - 1) / \
|
||||
(1 + ISER_INFLIGHT_DATAOUTS))
|
||||
|
||||
#define ISER_WC_BATCH_COUNT 16
|
||||
#define ISER_SIGNAL_CMD_COUNT 32
|
||||
|
||||
/* Maximal QP's recommended per CQ. In case we use more QP's per CQ we might *
|
||||
* encounter a CQ overrun state. */
|
||||
#define ISCSI_ISER_MAX_CONN 8
|
||||
#define ISER_MAX_RX_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN)
|
||||
#define ISER_MAX_TX_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN)
|
||||
#define ISER_MAX_CQ_LEN (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \
|
||||
ISCSI_ISER_MAX_CONN)
|
||||
|
||||
#define ISER_ZBVA_NOT_SUPPORTED 0x80
|
||||
#define ISER_SEND_W_INV_NOT_SUPPORTED 0x40
|
||||
|
||||
#define icl_to_iser_conn(ic) \
|
||||
container_of(ic, struct iser_conn, icl_conn)
|
||||
#define icl_to_iser_pdu(ip) \
|
||||
container_of(ip, struct icl_iser_pdu, icl_pdu)
|
||||
|
||||
/**
|
||||
* struct iser_hdr - iSER header
|
||||
*
|
||||
* @flags: flags support (zbva, remote_inv)
|
||||
* @rsvd: reserved
|
||||
* @write_stag: write rkey
|
||||
* @write_va: write virtual address
|
||||
* @reaf_stag: read rkey
|
||||
* @read_va: read virtual address
|
||||
*/
|
||||
struct iser_hdr {
|
||||
u8 flags;
|
||||
u8 rsvd[3];
|
||||
__be32 write_stag;
|
||||
__be64 write_va;
|
||||
__be32 read_stag;
|
||||
__be64 read_va;
|
||||
} __attribute__((packed));
|
||||
|
||||
struct iser_cm_hdr {
|
||||
u8 flags;
|
||||
u8 rsvd[3];
|
||||
} __packed;
|
||||
|
||||
/* Constant PDU lengths calculations */
|
||||
#define ISER_HEADERS_LEN (sizeof(struct iser_hdr) + ISCSI_BHS_SIZE)
|
||||
|
||||
#define ISER_RECV_DATA_SEG_LEN 128
|
||||
#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
|
||||
|
||||
#define ISER_RX_LOGIN_SIZE (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
|
||||
|
||||
enum iser_conn_state {
|
||||
ISER_CONN_INIT, /* descriptor allocd, no conn */
|
||||
ISER_CONN_PENDING, /* in the process of being established */
|
||||
ISER_CONN_UP, /* up and running */
|
||||
ISER_CONN_TERMINATING, /* in the process of being terminated */
|
||||
ISER_CONN_DOWN, /* shut down */
|
||||
ISER_CONN_STATES_NUM
|
||||
};
|
||||
|
||||
enum iser_task_status {
|
||||
ISER_TASK_STATUS_INIT = 0,
|
||||
ISER_TASK_STATUS_STARTED,
|
||||
ISER_TASK_STATUS_COMPLETED
|
||||
};
|
||||
|
||||
enum iser_data_dir {
|
||||
ISER_DIR_IN = 0, /* to initiator */
|
||||
ISER_DIR_OUT, /* from initiator */
|
||||
ISER_DIRS_NUM
|
||||
};
|
||||
|
||||
/**
|
||||
* struct iser_mem_reg - iSER memory registration info
|
||||
*
|
||||
* @sge: memory region sg element
|
||||
* @rkey: memory region remote key
|
||||
* @mem_h: pointer to registration context (FMR/Fastreg)
|
||||
*/
|
||||
struct iser_mem_reg {
|
||||
struct ib_sge sge;
|
||||
u32 rkey;
|
||||
void *mem_h;
|
||||
};
|
||||
|
||||
enum iser_desc_type {
|
||||
ISCSI_TX_CONTROL ,
|
||||
ISCSI_TX_SCSI_COMMAND,
|
||||
ISCSI_TX_DATAOUT
|
||||
};
|
||||
|
||||
/**
|
||||
* struct iser_data_buf - iSER data buffer
|
||||
*
|
||||
* @sg: pointer to the sg list
|
||||
* @size: num entries of this sg
|
||||
* @data_len: total beffer byte len
|
||||
* @dma_nents: returned by dma_map_sg
|
||||
* @copy_buf: allocated copy buf for SGs unaligned
|
||||
* for rdma which are copied
|
||||
* @orig_sg: pointer to the original sg list (in case
|
||||
* we used a copy)
|
||||
* @sg_single: SG-ified clone of a non SG SC or
|
||||
* unaligned SG
|
||||
*/
|
||||
struct iser_data_buf {
|
||||
struct scatterlist sgl[ISCSI_ISER_SG_TABLESIZE];
|
||||
void *sg;
|
||||
unsigned int size;
|
||||
unsigned long data_len;
|
||||
unsigned int dma_nents;
|
||||
char *copy_buf;
|
||||
struct scatterlist *orig_sg;
|
||||
struct scatterlist sg_single;
|
||||
};
|
||||
|
||||
/* fwd declarations */
|
||||
struct iser_conn;
|
||||
struct ib_conn;
|
||||
struct iser_device;
|
||||
|
||||
/**
|
||||
* struct iser_tx_desc - iSER TX descriptor (for send wr_id)
|
||||
*
|
||||
* @iser_header: iser header
|
||||
* @iscsi_header: iscsi header (bhs)
|
||||
* @type: command/control/dataout
|
||||
* @dma_addr: header buffer dma_address
|
||||
* @tx_sg: sg[0] points to iser/iscsi headers
|
||||
* sg[1] optionally points to either of immediate data
|
||||
* unsolicited data-out or control
|
||||
* @num_sge: number sges used on this TX task
|
||||
* @mapped: indicates if the descriptor is dma mapped
|
||||
*/
|
||||
struct iser_tx_desc {
|
||||
struct iser_hdr iser_header;
|
||||
struct iscsi_bhs iscsi_header __attribute__((packed));
|
||||
enum iser_desc_type type;
|
||||
u64 dma_addr;
|
||||
struct ib_sge tx_sg[2];
|
||||
int num_sge;
|
||||
bool mapped;
|
||||
};
|
||||
|
||||
#define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \
|
||||
sizeof(u64) + sizeof(struct ib_sge)))
|
||||
/**
|
||||
* struct iser_rx_desc - iSER RX descriptor (for recv wr_id)
|
||||
*
|
||||
* @iser_header: iser header
|
||||
* @iscsi_header: iscsi header
|
||||
* @data: received data segment
|
||||
* @dma_addr: receive buffer dma address
|
||||
* @rx_sg: ib_sge of receive buffer
|
||||
* @pad: for sense data TODO: Modify to maximum sense length supported
|
||||
*/
|
||||
struct iser_rx_desc {
|
||||
struct iser_hdr iser_header;
|
||||
struct iscsi_bhs iscsi_header;
|
||||
char data[ISER_RECV_DATA_SEG_LEN];
|
||||
u64 dma_addr;
|
||||
struct ib_sge rx_sg;
|
||||
char pad[ISER_RX_PAD_SIZE];
|
||||
} __attribute__((packed));
|
||||
|
||||
struct icl_iser_pdu {
|
||||
struct icl_pdu icl_pdu;
|
||||
struct iser_tx_desc desc;
|
||||
struct iser_conn *iser_conn;
|
||||
enum iser_task_status status;
|
||||
struct ccb_scsiio *csio;
|
||||
int command_sent;
|
||||
int dir[ISER_DIRS_NUM];
|
||||
struct iser_mem_reg rdma_reg[ISER_DIRS_NUM];
|
||||
struct iser_data_buf data[ISER_DIRS_NUM];
|
||||
};
|
||||
|
||||
/**
|
||||
* struct iser_comp - iSER completion context
|
||||
*
|
||||
* @device: pointer to device handle
|
||||
* @cq: completion queue
|
||||
* @wcs: work completion array
|
||||
* @tq: taskqueue handle
|
||||
* @task: task to run task_fn
|
||||
* @active_qps: Number of active QPs attached
|
||||
* to completion context
|
||||
*/
|
||||
struct iser_comp {
|
||||
struct iser_device *device;
|
||||
struct ib_cq *cq;
|
||||
struct ib_wc wcs[ISER_WC_BATCH_COUNT];
|
||||
struct taskqueue *tq;
|
||||
struct task task;
|
||||
int active_qps;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct iser_device - iSER device handle
|
||||
*
|
||||
* @ib_device: RDMA device
|
||||
* @pd: Protection Domain for this device
|
||||
* @dev_attr: Device attributes container
|
||||
* @mr: Global DMA memory region
|
||||
* @event_handler: IB events handle routine
|
||||
* @ig_list: entry in devices list
|
||||
* @refcount: Reference counter, dominated by open iser connections
|
||||
* @comps_used: Number of completion contexts used, Min between online
|
||||
* cpus and device max completion vectors
|
||||
* @comps: Dinamically allocated array of completion handlers
|
||||
*/
|
||||
struct iser_device {
|
||||
struct ib_device *ib_device;
|
||||
struct ib_pd *pd;
|
||||
struct ib_device_attr dev_attr;
|
||||
struct ib_mr *mr;
|
||||
struct ib_event_handler event_handler;
|
||||
struct list_head ig_list;
|
||||
int refcount;
|
||||
int comps_used;
|
||||
struct iser_comp *comps;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct iser_reg_resources - Fast registration recources
|
||||
*
|
||||
* @mr: memory region
|
||||
* @frpl: fast reg page list
|
||||
* @mr_valid: is mr valid indicator
|
||||
*/
|
||||
struct iser_reg_resources {
|
||||
struct ib_mr *mr;
|
||||
struct ib_fast_reg_page_list *frpl;
|
||||
u8 mr_valid:1;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct fast_reg_descriptor - Fast registration descriptor
|
||||
*
|
||||
* @list: entry in connection fastreg pool
|
||||
* @rsc: data buffer registration resources
|
||||
*/
|
||||
struct fast_reg_descriptor {
|
||||
struct list_head list;
|
||||
struct iser_reg_resources rsc;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* struct iser_beacon - beacon to signal all flush errors were drained
|
||||
*
|
||||
* @send: send wr
|
||||
* @recv: recv wr
|
||||
* @flush_lock: protects flush_cv
|
||||
* @flush_cv: condition variable for beacon flush
|
||||
*/
|
||||
struct iser_beacon {
|
||||
union {
|
||||
struct ib_send_wr send;
|
||||
struct ib_recv_wr recv;
|
||||
};
|
||||
struct mtx flush_lock;
|
||||
struct cv flush_cv;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct ib_conn - Infiniband related objects
|
||||
*
|
||||
* @cma_id: rdma_cm connection maneger handle
|
||||
* @qp: Connection Queue-pair
|
||||
* @device: reference to iser device
|
||||
* @comp: iser completion context
|
||||
*/
|
||||
struct ib_conn {
|
||||
struct rdma_cm_id *cma_id;
|
||||
struct ib_qp *qp;
|
||||
int post_recv_buf_count;
|
||||
u8 sig_count;
|
||||
struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
|
||||
struct iser_device *device;
|
||||
struct iser_comp *comp;
|
||||
struct iser_beacon beacon;
|
||||
struct mtx lock;
|
||||
union {
|
||||
struct {
|
||||
struct ib_fmr_pool *pool;
|
||||
struct iser_page_vec *page_vec;
|
||||
} fmr;
|
||||
struct {
|
||||
struct list_head pool;
|
||||
int pool_size;
|
||||
} fastreg;
|
||||
};
|
||||
};
|
||||
|
||||
struct iser_conn {
|
||||
struct icl_conn icl_conn;
|
||||
struct ib_conn ib_conn;
|
||||
struct cv up_cv;
|
||||
struct list_head conn_list;
|
||||
struct sx state_mutex;
|
||||
enum iser_conn_state state;
|
||||
int qp_max_recv_dtos;
|
||||
int min_posted_rx;
|
||||
u16 max_cmds;
|
||||
char *login_buf;
|
||||
char *login_req_buf, *login_resp_buf;
|
||||
u64 login_req_dma, login_resp_dma;
|
||||
unsigned int rx_desc_head;
|
||||
struct iser_rx_desc *rx_descs;
|
||||
u32 num_rx_descs;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct iser_global: iSER global context
|
||||
*
|
||||
* @device_list_mutex: protects device_list
|
||||
* @device_list: iser devices global list
|
||||
* @connlist_mutex: protects connlist
|
||||
* @connlist: iser connections global list
|
||||
* @desc_cache: kmem cache for tx dataout
|
||||
* @close_conns_mutex: serializes conns closure
|
||||
*/
|
||||
struct iser_global {
|
||||
struct sx device_list_mutex;
|
||||
struct list_head device_list;
|
||||
struct mtx connlist_mutex;
|
||||
struct list_head connlist;
|
||||
struct sx close_conns_mutex;
|
||||
};
|
||||
|
||||
extern struct iser_global ig;
|
||||
extern int iser_debug;
|
||||
|
||||
void
|
||||
iser_create_send_desc(struct iser_conn *, struct iser_tx_desc *);
|
||||
|
||||
int
|
||||
iser_post_recvl(struct iser_conn *);
|
||||
|
||||
int
|
||||
iser_post_recvm(struct iser_conn *, int);
|
||||
|
||||
int
|
||||
iser_alloc_login_buf(struct iser_conn *iser_conn);
|
||||
|
||||
void
|
||||
iser_free_login_buf(struct iser_conn *iser_conn);
|
||||
|
||||
int
|
||||
iser_post_send(struct ib_conn *, struct iser_tx_desc *, bool);
|
||||
|
||||
void
|
||||
iser_snd_completion(struct iser_tx_desc *, struct ib_conn *);
|
||||
|
||||
void
|
||||
iser_rcv_completion(struct iser_rx_desc *, unsigned long,
|
||||
struct ib_conn *);
|
||||
|
||||
void
|
||||
iser_pdu_free(struct icl_conn *, struct icl_pdu *);
|
||||
|
||||
struct icl_pdu *
|
||||
iser_new_pdu(struct icl_conn *ic, int flags);
|
||||
|
||||
int
|
||||
iser_alloc_rx_descriptors(struct iser_conn *, int);
|
||||
|
||||
void
|
||||
iser_free_rx_descriptors(struct iser_conn *);
|
||||
|
||||
int
|
||||
iser_initialize_headers(struct icl_iser_pdu *, struct iser_conn *);
|
||||
|
||||
int
|
||||
iser_send_control(struct iser_conn *, struct icl_iser_pdu *);
|
||||
|
||||
int
|
||||
iser_send_command(struct iser_conn *, struct icl_iser_pdu *);
|
||||
|
||||
int
|
||||
iser_reg_rdma_mem(struct icl_iser_pdu *, enum iser_data_dir);
|
||||
|
||||
void
|
||||
iser_unreg_rdma_mem(struct icl_iser_pdu *, enum iser_data_dir);
|
||||
|
||||
int
|
||||
iser_create_fastreg_pool(struct ib_conn *, unsigned);
|
||||
|
||||
void
|
||||
iser_free_fastreg_pool(struct ib_conn *);
|
||||
|
||||
int
|
||||
iser_dma_map_task_data(struct icl_iser_pdu *,
|
||||
struct iser_data_buf *, enum iser_data_dir,
|
||||
enum dma_data_direction);
|
||||
|
||||
int
|
||||
iser_conn_terminate(struct iser_conn *);
|
||||
|
||||
void
|
||||
iser_free_ib_conn_res(struct iser_conn *, bool);
|
||||
|
||||
void
|
||||
iser_dma_unmap_task_data(struct icl_iser_pdu *, struct iser_data_buf *,
|
||||
enum dma_data_direction);
|
||||
|
||||
int
|
||||
iser_cma_handler(struct rdma_cm_id *, struct rdma_cm_event *);
|
||||
|
||||
#endif /* !ICL_ISER_H */
|
539
sys/dev/iser/iser_initiator.c
Normal file
539
sys/dev/iser/iser_initiator.c
Normal file
@ -0,0 +1,539 @@
|
||||
/* $FreeBSD$ */
|
||||
/*-
|
||||
* Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "icl_iser.h"
|
||||
|
||||
static MALLOC_DEFINE(M_ISER_INITIATOR, "iser_initiator", "iser initiator backend");
|
||||
|
||||
/* Register user buffer memory and initialize passive rdma
|
||||
* dto descriptor. Data size is stored in
|
||||
* task->data[ISER_DIR_IN].data_len, Protection size
|
||||
* os stored in task->prot[ISER_DIR_IN].data_len
|
||||
*/
|
||||
static int
|
||||
iser_prepare_read_cmd(struct icl_iser_pdu *iser_pdu)
|
||||
{
|
||||
struct iser_hdr *hdr = &iser_pdu->desc.iser_header;
|
||||
struct iser_data_buf *buf_in = &iser_pdu->data[ISER_DIR_IN];
|
||||
struct iser_mem_reg *mem_reg;
|
||||
int err;
|
||||
|
||||
err = iser_dma_map_task_data(iser_pdu,
|
||||
buf_in,
|
||||
ISER_DIR_IN,
|
||||
DMA_FROM_DEVICE);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
err = iser_reg_rdma_mem(iser_pdu, ISER_DIR_IN);
|
||||
if (err) {
|
||||
ISER_ERR("Failed to set up Data-IN RDMA");
|
||||
return (err);
|
||||
}
|
||||
|
||||
mem_reg = &iser_pdu->rdma_reg[ISER_DIR_IN];
|
||||
|
||||
hdr->flags |= ISER_RSV;
|
||||
hdr->read_stag = cpu_to_be32(mem_reg->rkey);
|
||||
hdr->read_va = cpu_to_be64(mem_reg->sge.addr);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* Register user buffer memory and initialize passive rdma
|
||||
* dto descriptor. Data size is stored in
|
||||
* task->data[ISER_DIR_OUT].data_len, Protection size
|
||||
* is stored at task->prot[ISER_DIR_OUT].data_len
|
||||
*/
|
||||
static int
|
||||
iser_prepare_write_cmd(struct icl_iser_pdu *iser_pdu)
|
||||
{
|
||||
struct iser_hdr *hdr = &iser_pdu->desc.iser_header;
|
||||
struct iser_data_buf *buf_out = &iser_pdu->data[ISER_DIR_OUT];
|
||||
struct iser_mem_reg *mem_reg;
|
||||
int err;
|
||||
|
||||
err = iser_dma_map_task_data(iser_pdu,
|
||||
buf_out,
|
||||
ISER_DIR_OUT,
|
||||
DMA_TO_DEVICE);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
err = iser_reg_rdma_mem(iser_pdu, ISER_DIR_OUT);
|
||||
if (err) {
|
||||
ISER_ERR("Failed to set up Data-out RDMA");
|
||||
return (err);
|
||||
}
|
||||
|
||||
mem_reg = &iser_pdu->rdma_reg[ISER_DIR_OUT];
|
||||
|
||||
hdr->flags |= ISER_WSV;
|
||||
hdr->write_stag = cpu_to_be32(mem_reg->rkey);
|
||||
hdr->write_va = cpu_to_be64(mem_reg->sge.addr);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* creates a new tx descriptor and adds header regd buffer */
|
||||
void
|
||||
iser_create_send_desc(struct iser_conn *iser_conn,
|
||||
struct iser_tx_desc *tx_desc)
|
||||
{
|
||||
struct iser_device *device = iser_conn->ib_conn.device;
|
||||
|
||||
ib_dma_sync_single_for_cpu(device->ib_device,
|
||||
tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
|
||||
|
||||
memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
|
||||
tx_desc->iser_header.flags = ISER_VER;
|
||||
|
||||
tx_desc->num_sge = 1;
|
||||
|
||||
if (tx_desc->tx_sg[0].lkey != device->mr->lkey) {
|
||||
tx_desc->tx_sg[0].lkey = device->mr->lkey;
|
||||
ISER_DBG("sdesc %p lkey mismatch, fixing", tx_desc);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
iser_free_login_buf(struct iser_conn *iser_conn)
|
||||
{
|
||||
struct iser_device *device = iser_conn->ib_conn.device;
|
||||
|
||||
if (!iser_conn->login_buf)
|
||||
return;
|
||||
|
||||
if (iser_conn->login_req_dma)
|
||||
ib_dma_unmap_single(device->ib_device,
|
||||
iser_conn->login_req_dma,
|
||||
ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
|
||||
|
||||
if (iser_conn->login_resp_dma)
|
||||
ib_dma_unmap_single(device->ib_device,
|
||||
iser_conn->login_resp_dma,
|
||||
ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
|
||||
|
||||
free(iser_conn->login_buf, M_ISER_INITIATOR);
|
||||
|
||||
/* make sure we never redo any unmapping */
|
||||
iser_conn->login_req_dma = 0;
|
||||
iser_conn->login_resp_dma = 0;
|
||||
iser_conn->login_buf = NULL;
|
||||
}
|
||||
|
||||
int
|
||||
iser_alloc_login_buf(struct iser_conn *iser_conn)
|
||||
{
|
||||
struct iser_device *device = iser_conn->ib_conn.device;
|
||||
int req_err, resp_err;
|
||||
|
||||
BUG_ON(device == NULL);
|
||||
|
||||
iser_conn->login_buf = malloc(ISCSI_DEF_MAX_RECV_SEG_LEN + ISER_RX_LOGIN_SIZE,
|
||||
M_ISER_INITIATOR, M_WAITOK | M_ZERO);
|
||||
|
||||
if (!iser_conn->login_buf)
|
||||
goto out_err;
|
||||
|
||||
iser_conn->login_req_buf = iser_conn->login_buf;
|
||||
iser_conn->login_resp_buf = iser_conn->login_buf +
|
||||
ISCSI_DEF_MAX_RECV_SEG_LEN;
|
||||
|
||||
iser_conn->login_req_dma = ib_dma_map_single(device->ib_device,
|
||||
iser_conn->login_req_buf,
|
||||
ISCSI_DEF_MAX_RECV_SEG_LEN,
|
||||
DMA_TO_DEVICE);
|
||||
|
||||
iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device,
|
||||
iser_conn->login_resp_buf,
|
||||
ISER_RX_LOGIN_SIZE,
|
||||
DMA_FROM_DEVICE);
|
||||
|
||||
req_err = ib_dma_mapping_error(device->ib_device,
|
||||
iser_conn->login_req_dma);
|
||||
resp_err = ib_dma_mapping_error(device->ib_device,
|
||||
iser_conn->login_resp_dma);
|
||||
|
||||
if (req_err || resp_err) {
|
||||
if (req_err)
|
||||
iser_conn->login_req_dma = 0;
|
||||
if (resp_err)
|
||||
iser_conn->login_resp_dma = 0;
|
||||
goto free_login_buf;
|
||||
}
|
||||
|
||||
return (0);
|
||||
|
||||
free_login_buf:
|
||||
iser_free_login_buf(iser_conn);
|
||||
|
||||
out_err:
|
||||
ISER_DBG("unable to alloc or map login buf");
|
||||
return (ENOMEM);
|
||||
}
|
||||
|
||||
int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, int cmds_max)
|
||||
{
|
||||
int i, j;
|
||||
u64 dma_addr;
|
||||
struct iser_rx_desc *rx_desc;
|
||||
struct ib_sge *rx_sg;
|
||||
struct ib_conn *ib_conn = &iser_conn->ib_conn;
|
||||
struct iser_device *device = ib_conn->device;
|
||||
|
||||
iser_conn->qp_max_recv_dtos = cmds_max;
|
||||
iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2;
|
||||
|
||||
if (iser_create_fastreg_pool(ib_conn, cmds_max))
|
||||
goto create_rdma_reg_res_failed;
|
||||
|
||||
|
||||
iser_conn->num_rx_descs = cmds_max;
|
||||
iser_conn->rx_descs = malloc(iser_conn->num_rx_descs *
|
||||
sizeof(struct iser_rx_desc), M_ISER_INITIATOR,
|
||||
M_WAITOK | M_ZERO);
|
||||
if (!iser_conn->rx_descs)
|
||||
goto rx_desc_alloc_fail;
|
||||
|
||||
rx_desc = iser_conn->rx_descs;
|
||||
|
||||
for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++) {
|
||||
dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc,
|
||||
ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
|
||||
if (ib_dma_mapping_error(device->ib_device, dma_addr))
|
||||
goto rx_desc_dma_map_failed;
|
||||
|
||||
rx_desc->dma_addr = dma_addr;
|
||||
|
||||
rx_sg = &rx_desc->rx_sg;
|
||||
rx_sg->addr = rx_desc->dma_addr;
|
||||
rx_sg->length = ISER_RX_PAYLOAD_SIZE;
|
||||
rx_sg->lkey = device->mr->lkey;
|
||||
}
|
||||
|
||||
iser_conn->rx_desc_head = 0;
|
||||
|
||||
return (0);
|
||||
|
||||
rx_desc_dma_map_failed:
|
||||
rx_desc = iser_conn->rx_descs;
|
||||
for (j = 0; j < i; j++, rx_desc++)
|
||||
ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
|
||||
ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
|
||||
free(iser_conn->rx_descs, M_ISER_INITIATOR);
|
||||
iser_conn->rx_descs = NULL;
|
||||
rx_desc_alloc_fail:
|
||||
iser_free_fastreg_pool(ib_conn);
|
||||
create_rdma_reg_res_failed:
|
||||
ISER_ERR("failed allocating rx descriptors / data buffers");
|
||||
|
||||
return (ENOMEM);
|
||||
}
|
||||
|
||||
void
|
||||
iser_free_rx_descriptors(struct iser_conn *iser_conn)
|
||||
{
|
||||
int i;
|
||||
struct iser_rx_desc *rx_desc;
|
||||
struct ib_conn *ib_conn = &iser_conn->ib_conn;
|
||||
struct iser_device *device = ib_conn->device;
|
||||
|
||||
iser_free_fastreg_pool(ib_conn);
|
||||
|
||||
rx_desc = iser_conn->rx_descs;
|
||||
for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++)
|
||||
ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
|
||||
ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
|
||||
|
||||
free(iser_conn->rx_descs, M_ISER_INITIATOR);
|
||||
|
||||
/* make sure we never redo any unmapping */
|
||||
iser_conn->rx_descs = NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
iser_buf_to_sg(void *buf, struct iser_data_buf *data_buf)
|
||||
{
|
||||
struct scatterlist *sg;
|
||||
int i;
|
||||
size_t len, tlen;
|
||||
int offset;
|
||||
|
||||
tlen = data_buf->data_len;
|
||||
|
||||
for (i = 0; 0 < tlen; i++, tlen -= len) {
|
||||
sg = &data_buf->sgl[i];
|
||||
offset = ((uintptr_t)buf) & ~PAGE_MASK;
|
||||
len = min(PAGE_SIZE - offset, tlen);
|
||||
sg_set_buf(sg, buf, len);
|
||||
buf = (void *)(((u64)buf) + (u64)len);
|
||||
}
|
||||
|
||||
data_buf->size = i;
|
||||
sg_mark_end(sg);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
iser_bio_to_sg(struct bio *bp, struct iser_data_buf *data_buf)
|
||||
{
|
||||
struct scatterlist *sg;
|
||||
int i;
|
||||
size_t len, tlen;
|
||||
int offset;
|
||||
|
||||
tlen = bp->bio_bcount;
|
||||
offset = bp->bio_ma_offset;
|
||||
|
||||
for (i = 0; 0 < tlen; i++, tlen -= len) {
|
||||
sg = &data_buf->sgl[i];
|
||||
len = min(PAGE_SIZE - offset, tlen);
|
||||
sg_set_page(sg, bp->bio_ma[i], len, offset);
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
data_buf->size = i;
|
||||
sg_mark_end(sg);
|
||||
}
|
||||
|
||||
static int
|
||||
iser_csio_to_sg(struct ccb_scsiio *csio, struct iser_data_buf *data_buf)
|
||||
{
|
||||
struct ccb_hdr *ccbh;
|
||||
int err = 0;
|
||||
|
||||
ccbh = &csio->ccb_h;
|
||||
switch ((ccbh->flags & CAM_DATA_MASK)) {
|
||||
case CAM_DATA_BIO:
|
||||
iser_bio_to_sg((struct bio *) csio->data_ptr, data_buf);
|
||||
break;
|
||||
case CAM_DATA_VADDR:
|
||||
/*
|
||||
* Support KVA buffers for various scsi commands such as:
|
||||
* - REPORT_LUNS
|
||||
* - MODE_SENSE_6
|
||||
* - INQUIRY
|
||||
* - SERVICE_ACTION_IN.
|
||||
* The data of these commands always mapped into KVA.
|
||||
*/
|
||||
iser_buf_to_sg(csio->data_ptr, data_buf);
|
||||
break;
|
||||
default:
|
||||
ISER_ERR("flags 0x%X unimplemented", ccbh->flags);
|
||||
err = EINVAL;
|
||||
}
|
||||
return (err);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
iser_signal_comp(u8 sig_count)
|
||||
{
|
||||
return ((sig_count % ISER_SIGNAL_CMD_COUNT) == 0);
|
||||
}
|
||||
|
||||
int
|
||||
iser_send_command(struct iser_conn *iser_conn,
|
||||
struct icl_iser_pdu *iser_pdu)
|
||||
{
|
||||
struct iser_data_buf *data_buf;
|
||||
struct iser_tx_desc *tx_desc = &iser_pdu->desc;
|
||||
struct iscsi_bhs_scsi_command *hdr = (struct iscsi_bhs_scsi_command *) &(iser_pdu->desc.iscsi_header);
|
||||
struct ccb_scsiio *csio = iser_pdu->csio;
|
||||
int err = 0;
|
||||
u8 sig_count = ++iser_conn->ib_conn.sig_count;
|
||||
|
||||
/* build the tx desc regd header and add it to the tx desc dto */
|
||||
tx_desc->type = ISCSI_TX_SCSI_COMMAND;
|
||||
iser_create_send_desc(iser_conn, tx_desc);
|
||||
|
||||
if (hdr->bhssc_flags & BHSSC_FLAGS_R) {
|
||||
data_buf = &iser_pdu->data[ISER_DIR_IN];
|
||||
} else {
|
||||
data_buf = &iser_pdu->data[ISER_DIR_OUT];
|
||||
}
|
||||
|
||||
data_buf->sg = csio->data_ptr;
|
||||
data_buf->data_len = csio->dxfer_len;
|
||||
|
||||
if (likely(csio->dxfer_len)) {
|
||||
err = iser_csio_to_sg(csio, data_buf);
|
||||
if (unlikely(err))
|
||||
goto send_command_error;
|
||||
}
|
||||
|
||||
if (hdr->bhssc_flags & BHSSC_FLAGS_R) {
|
||||
err = iser_prepare_read_cmd(iser_pdu);
|
||||
if (err)
|
||||
goto send_command_error;
|
||||
} else if (hdr->bhssc_flags & BHSSC_FLAGS_W) {
|
||||
err = iser_prepare_write_cmd(iser_pdu);
|
||||
if (err)
|
||||
goto send_command_error;
|
||||
}
|
||||
|
||||
err = iser_post_send(&iser_conn->ib_conn, tx_desc,
|
||||
iser_signal_comp(sig_count));
|
||||
if (!err)
|
||||
return (0);
|
||||
|
||||
send_command_error:
|
||||
ISER_ERR("iser_conn %p itt %u len %u err %d", iser_conn,
|
||||
hdr->bhssc_initiator_task_tag,
|
||||
hdr->bhssc_expected_data_transfer_length,
|
||||
err);
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
iser_send_control(struct iser_conn *iser_conn,
|
||||
struct icl_iser_pdu *iser_pdu)
|
||||
{
|
||||
struct iser_tx_desc *mdesc;
|
||||
struct iser_device *device;
|
||||
size_t datalen = iser_pdu->icl_pdu.ip_data_len;
|
||||
struct icl_conn *ic = &iser_conn->icl_conn;
|
||||
int err;
|
||||
|
||||
mdesc = &iser_pdu->desc;
|
||||
|
||||
/* build the tx desc regd header and add it to the tx desc dto */
|
||||
mdesc->type = ISCSI_TX_CONTROL;
|
||||
iser_create_send_desc(iser_conn, mdesc);
|
||||
|
||||
device = iser_conn->ib_conn.device;
|
||||
|
||||
if (datalen > 0) {
|
||||
struct ib_sge *tx_dsg = &mdesc->tx_sg[1];
|
||||
ib_dma_sync_single_for_cpu(device->ib_device,
|
||||
iser_conn->login_req_dma, datalen,
|
||||
DMA_TO_DEVICE);
|
||||
|
||||
ib_dma_sync_single_for_device(device->ib_device,
|
||||
iser_conn->login_req_dma, datalen,
|
||||
DMA_TO_DEVICE);
|
||||
|
||||
tx_dsg->addr = iser_conn->login_req_dma;
|
||||
tx_dsg->length = datalen;
|
||||
tx_dsg->lkey = device->mr->lkey;
|
||||
mdesc->num_sge = 2;
|
||||
}
|
||||
|
||||
/* For discovery session we re-use the login buffer */
|
||||
if (ic->ic_session_login_phase(ic) || ic->ic_session_type_discovery(ic)) {
|
||||
err = iser_post_recvl(iser_conn);
|
||||
if (err)
|
||||
goto send_control_error;
|
||||
}
|
||||
|
||||
err = iser_post_send(&iser_conn->ib_conn, mdesc, true);
|
||||
if (!err)
|
||||
return (0);
|
||||
|
||||
send_control_error:
|
||||
ISER_ERR("conn %p failed err %d", iser_conn, err);
|
||||
|
||||
return (err);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* iser_rcv_dto_completion - recv DTO completion
|
||||
*/
|
||||
void
|
||||
iser_rcv_completion(struct iser_rx_desc *rx_desc,
|
||||
unsigned long rx_xfer_len,
|
||||
struct ib_conn *ib_conn)
|
||||
{
|
||||
struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
|
||||
ib_conn);
|
||||
struct icl_conn *ic = &iser_conn->icl_conn;
|
||||
struct icl_pdu *response;
|
||||
struct iscsi_bhs *hdr;
|
||||
u64 rx_dma;
|
||||
int rx_buflen;
|
||||
int outstanding, count, err;
|
||||
|
||||
/* differentiate between login to all other PDUs */
|
||||
if ((char *)rx_desc == iser_conn->login_resp_buf) {
|
||||
rx_dma = iser_conn->login_resp_dma;
|
||||
rx_buflen = ISER_RX_LOGIN_SIZE;
|
||||
} else {
|
||||
rx_dma = rx_desc->dma_addr;
|
||||
rx_buflen = ISER_RX_PAYLOAD_SIZE;
|
||||
}
|
||||
|
||||
ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
|
||||
rx_buflen, DMA_FROM_DEVICE);
|
||||
|
||||
hdr = &rx_desc->iscsi_header;
|
||||
|
||||
response = iser_new_pdu(ic, M_NOWAIT);
|
||||
response->ip_bhs = hdr;
|
||||
response->ip_data_len = rx_xfer_len - ISER_HEADERS_LEN;
|
||||
|
||||
/*
|
||||
* In case we got data in the receive buffer, assign the ip_data_mbuf
|
||||
* to the rx_buffer - later we'll copy it to upper layer buffers
|
||||
*/
|
||||
if (response->ip_data_len)
|
||||
response->ip_data_mbuf = (struct mbuf *)(rx_desc->data);
|
||||
|
||||
ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
|
||||
rx_buflen, DMA_FROM_DEVICE);
|
||||
|
||||
/* decrementing conn->post_recv_buf_count only --after-- freeing the *
|
||||
* task eliminates the need to worry on tasks which are completed in *
|
||||
* parallel to the execution of iser_conn_term. So the code that waits *
|
||||
* for the posted rx bufs refcount to become zero handles everything */
|
||||
ib_conn->post_recv_buf_count--;
|
||||
|
||||
if (rx_dma == iser_conn->login_resp_dma)
|
||||
goto receive;
|
||||
|
||||
outstanding = ib_conn->post_recv_buf_count;
|
||||
if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) {
|
||||
count = min(iser_conn->qp_max_recv_dtos - outstanding,
|
||||
iser_conn->min_posted_rx);
|
||||
err = iser_post_recvm(iser_conn, count);
|
||||
if (err)
|
||||
ISER_ERR("posting %d rx bufs err %d", count, err);
|
||||
}
|
||||
|
||||
receive:
|
||||
(ic->ic_receive)(response);
|
||||
}
|
||||
|
||||
void
|
||||
iser_snd_completion(struct iser_tx_desc *tx_desc,
|
||||
struct ib_conn *ib_conn)
|
||||
{
|
||||
struct icl_iser_pdu *iser_pdu = container_of(tx_desc, struct icl_iser_pdu, desc);
|
||||
struct iser_conn *iser_conn = iser_pdu->iser_conn;
|
||||
|
||||
if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL)
|
||||
iser_pdu_free(&iser_conn->icl_conn, &iser_pdu->icl_pdu);
|
||||
}
|
348
sys/dev/iser/iser_memory.c
Normal file
348
sys/dev/iser/iser_memory.c
Normal file
@ -0,0 +1,348 @@
|
||||
/* $FreeBSD$ */
|
||||
/*-
|
||||
* Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "icl_iser.h"
|
||||
|
||||
static struct fast_reg_descriptor *
|
||||
iser_reg_desc_get(struct ib_conn *ib_conn)
|
||||
{
|
||||
struct fast_reg_descriptor *desc;
|
||||
|
||||
mtx_lock(&ib_conn->lock);
|
||||
desc = list_first_entry(&ib_conn->fastreg.pool,
|
||||
struct fast_reg_descriptor, list);
|
||||
list_del(&desc->list);
|
||||
mtx_unlock(&ib_conn->lock);
|
||||
|
||||
return (desc);
|
||||
}
|
||||
|
||||
static void
|
||||
iser_reg_desc_put(struct ib_conn *ib_conn,
|
||||
struct fast_reg_descriptor *desc)
|
||||
{
|
||||
mtx_lock(&ib_conn->lock);
|
||||
list_add(&desc->list, &ib_conn->fastreg.pool);
|
||||
mtx_unlock(&ib_conn->lock);
|
||||
}
|
||||
|
||||
#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
|
||||
|
||||
/**
|
||||
* iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
|
||||
* and returns the length of resulting physical address array (may be less than
|
||||
* the original due to possible compaction).
|
||||
*
|
||||
* we build a "page vec" under the assumption that the SG meets the RDMA
|
||||
* alignment requirements. Other then the first and last SG elements, all
|
||||
* the "internal" elements can be compacted into a list whose elements are
|
||||
* dma addresses of physical pages. The code supports also the weird case
|
||||
* where --few fragments of the same page-- are present in the SG as
|
||||
* consecutive elements. Also, it handles one entry SG.
|
||||
*/
|
||||
static int
|
||||
iser_sg_to_page_vec(struct iser_data_buf *data,
|
||||
struct ib_device *ibdev, u64 *pages,
|
||||
int *offset, int *data_size)
|
||||
{
|
||||
struct scatterlist *sg, *sgl = data->sgl;
|
||||
u64 start_addr, end_addr, page, chunk_start = 0;
|
||||
unsigned long total_sz = 0;
|
||||
unsigned int dma_len;
|
||||
int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
|
||||
|
||||
/* compute the offset of first element */
|
||||
*offset = (u64) sgl[0].offset & ~MASK_4K;
|
||||
|
||||
new_chunk = 1;
|
||||
cur_page = 0;
|
||||
for_each_sg(sgl, sg, data->dma_nents, i) {
|
||||
start_addr = ib_sg_dma_address(ibdev, sg);
|
||||
if (new_chunk)
|
||||
chunk_start = start_addr;
|
||||
dma_len = ib_sg_dma_len(ibdev, sg);
|
||||
end_addr = start_addr + dma_len;
|
||||
total_sz += dma_len;
|
||||
|
||||
/* collect page fragments until aligned or end of SG list */
|
||||
if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
|
||||
new_chunk = 0;
|
||||
continue;
|
||||
}
|
||||
new_chunk = 1;
|
||||
|
||||
/* address of the first page in the contiguous chunk;
|
||||
masking relevant for the very first SG entry,
|
||||
which might be unaligned */
|
||||
page = chunk_start & MASK_4K;
|
||||
do {
|
||||
pages[cur_page++] = page;
|
||||
page += SIZE_4K;
|
||||
} while (page < end_addr);
|
||||
}
|
||||
|
||||
*data_size = total_sz;
|
||||
|
||||
return (cur_page);
|
||||
}
|
||||
|
||||
/**
|
||||
* iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
|
||||
* for RDMA sub-list of a scatter-gather list of memory buffers, and returns
|
||||
* the number of entries which are aligned correctly. Supports the case where
|
||||
* consecutive SG elements are actually fragments of the same physcial page.
|
||||
*/
|
||||
static int
|
||||
iser_data_buf_aligned_len(struct iser_data_buf *data, struct ib_device *ibdev)
|
||||
{
|
||||
struct scatterlist *sg, *sgl, *next_sg = NULL;
|
||||
u64 start_addr, end_addr;
|
||||
int i, ret_len, start_check = 0;
|
||||
|
||||
if (data->dma_nents == 1)
|
||||
return (1);
|
||||
|
||||
sgl = data->sgl;
|
||||
start_addr = ib_sg_dma_address(ibdev, sgl);
|
||||
|
||||
for_each_sg(sgl, sg, data->dma_nents, i) {
|
||||
if (start_check && !IS_4K_ALIGNED(start_addr))
|
||||
break;
|
||||
|
||||
next_sg = sg_next(sg);
|
||||
if (!next_sg)
|
||||
break;
|
||||
|
||||
end_addr = start_addr + ib_sg_dma_len(ibdev, sg);
|
||||
start_addr = ib_sg_dma_address(ibdev, next_sg);
|
||||
|
||||
if (end_addr == start_addr) {
|
||||
start_check = 0;
|
||||
continue;
|
||||
} else
|
||||
start_check = 1;
|
||||
|
||||
if (!IS_4K_ALIGNED(end_addr))
|
||||
break;
|
||||
}
|
||||
ret_len = (next_sg) ? i : i+1;
|
||||
|
||||
return (ret_len);
|
||||
}
|
||||
|
||||
void
|
||||
iser_dma_unmap_task_data(struct icl_iser_pdu *iser_pdu,
|
||||
struct iser_data_buf *data,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
struct ib_device *dev;
|
||||
|
||||
dev = iser_pdu->iser_conn->ib_conn.device->ib_device;
|
||||
ib_dma_unmap_sg(dev, data->sgl, data->size, dir);
|
||||
}
|
||||
|
||||
static int
|
||||
iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
|
||||
struct iser_mem_reg *reg)
|
||||
{
|
||||
struct scatterlist *sg = mem->sgl;
|
||||
|
||||
reg->sge.lkey = device->mr->lkey;
|
||||
reg->rkey = device->mr->rkey;
|
||||
reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);
|
||||
reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/**
|
||||
* TODO: This should be a verb
|
||||
* iser_ib_inc_rkey - increments the key portion of the given rkey. Can be used
|
||||
* for calculating a new rkey for type 2 memory windows.
|
||||
* @rkey - the rkey to increment.
|
||||
*/
|
||||
static inline u32
|
||||
iser_ib_inc_rkey(u32 rkey)
|
||||
{
|
||||
const u32 mask = 0x000000ff;
|
||||
|
||||
return (((rkey + 1) & mask) | (rkey & ~mask));
|
||||
}
|
||||
|
||||
static void
|
||||
iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
|
||||
{
|
||||
u32 rkey;
|
||||
|
||||
memset(inv_wr, 0, sizeof(*inv_wr));
|
||||
inv_wr->opcode = IB_WR_LOCAL_INV;
|
||||
inv_wr->wr_id = ISER_FASTREG_LI_WRID;
|
||||
inv_wr->ex.invalidate_rkey = mr->rkey;
|
||||
|
||||
rkey = iser_ib_inc_rkey(mr->rkey);
|
||||
ib_update_fast_reg_key(mr, rkey);
|
||||
}
|
||||
|
||||
static int
|
||||
iser_fast_reg_mr(struct icl_iser_pdu *iser_pdu,
|
||||
struct iser_data_buf *mem,
|
||||
struct iser_reg_resources *rsc,
|
||||
struct iser_mem_reg *reg)
|
||||
{
|
||||
struct ib_conn *ib_conn = &iser_pdu->iser_conn->ib_conn;
|
||||
struct iser_device *device = ib_conn->device;
|
||||
struct ib_send_wr fastreg_wr, inv_wr;
|
||||
struct ib_send_wr *bad_wr, *wr = NULL;
|
||||
int ret, offset, size, plen;
|
||||
|
||||
/* if there a single dma entry, dma mr suffices */
|
||||
if (mem->dma_nents == 1)
|
||||
return iser_reg_dma(device, mem, reg);
|
||||
|
||||
/* rsc is not null */
|
||||
plen = iser_sg_to_page_vec(mem, device->ib_device,
|
||||
rsc->frpl->page_list,
|
||||
&offset, &size);
|
||||
if (plen * SIZE_4K < size) {
|
||||
ISER_ERR("fast reg page_list too short to hold this SG");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
if (!rsc->mr_valid) {
|
||||
iser_inv_rkey(&inv_wr, rsc->mr);
|
||||
wr = &inv_wr;
|
||||
}
|
||||
|
||||
/* Prepare FASTREG WR */
|
||||
memset(&fastreg_wr, 0, sizeof(fastreg_wr));
|
||||
fastreg_wr.wr_id = ISER_FASTREG_LI_WRID;
|
||||
fastreg_wr.opcode = IB_WR_FAST_REG_MR;
|
||||
fastreg_wr.wr.fast_reg.iova_start = rsc->frpl->page_list[0] + offset;
|
||||
fastreg_wr.wr.fast_reg.page_list = rsc->frpl;
|
||||
fastreg_wr.wr.fast_reg.page_list_len = plen;
|
||||
fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K;
|
||||
fastreg_wr.wr.fast_reg.length = size;
|
||||
fastreg_wr.wr.fast_reg.rkey = rsc->mr->rkey;
|
||||
fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
|
||||
IB_ACCESS_REMOTE_WRITE |
|
||||
IB_ACCESS_REMOTE_READ);
|
||||
|
||||
if (!wr)
|
||||
wr = &fastreg_wr;
|
||||
else
|
||||
wr->next = &fastreg_wr;
|
||||
|
||||
ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
|
||||
if (ret) {
|
||||
ISER_ERR("fast registration failed, ret:%d", ret);
|
||||
return (ret);
|
||||
}
|
||||
rsc->mr_valid = 0;
|
||||
|
||||
reg->sge.lkey = rsc->mr->lkey;
|
||||
reg->rkey = rsc->mr->rkey;
|
||||
reg->sge.addr = rsc->frpl->page_list[0] + offset;
|
||||
reg->sge.length = size;
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/**
|
||||
* iser_reg_rdma_mem - Registers memory intended for RDMA,
|
||||
* using Fast Registration WR (if possible) obtaining rkey and va
|
||||
*
|
||||
* returns 0 on success, errno code on failure
|
||||
*/
|
||||
int
|
||||
iser_reg_rdma_mem(struct icl_iser_pdu *iser_pdu,
|
||||
enum iser_data_dir cmd_dir)
|
||||
{
|
||||
struct ib_conn *ib_conn = &iser_pdu->iser_conn->ib_conn;
|
||||
struct iser_device *device = ib_conn->device;
|
||||
struct ib_device *ibdev = device->ib_device;
|
||||
struct iser_data_buf *mem = &iser_pdu->data[cmd_dir];
|
||||
struct iser_mem_reg *mem_reg = &iser_pdu->rdma_reg[cmd_dir];
|
||||
struct fast_reg_descriptor *desc = NULL;
|
||||
int err, aligned_len;
|
||||
|
||||
aligned_len = iser_data_buf_aligned_len(mem, ibdev);
|
||||
if (aligned_len != mem->dma_nents) {
|
||||
ISER_ERR("bounce buffer is not supported");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (mem->dma_nents != 1) {
|
||||
desc = iser_reg_desc_get(ib_conn);
|
||||
mem_reg->mem_h = desc;
|
||||
}
|
||||
|
||||
err = iser_fast_reg_mr(iser_pdu, mem, desc ? &desc->rsc : NULL,
|
||||
mem_reg);
|
||||
if (err)
|
||||
goto err_reg;
|
||||
|
||||
return (0);
|
||||
|
||||
err_reg:
|
||||
if (desc)
|
||||
iser_reg_desc_put(ib_conn, desc);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
void
|
||||
iser_unreg_rdma_mem(struct icl_iser_pdu *iser_pdu,
|
||||
enum iser_data_dir cmd_dir)
|
||||
{
|
||||
struct iser_mem_reg *reg = &iser_pdu->rdma_reg[cmd_dir];
|
||||
|
||||
if (!reg->mem_h)
|
||||
return;
|
||||
|
||||
iser_reg_desc_put(&iser_pdu->iser_conn->ib_conn,
|
||||
reg->mem_h);
|
||||
reg->mem_h = NULL;
|
||||
}
|
||||
|
||||
int
|
||||
iser_dma_map_task_data(struct icl_iser_pdu *iser_pdu,
|
||||
struct iser_data_buf *data,
|
||||
enum iser_data_dir iser_dir,
|
||||
enum dma_data_direction dma_dir)
|
||||
{
|
||||
struct ib_device *dev;
|
||||
|
||||
iser_pdu->dir[iser_dir] = 1;
|
||||
dev = iser_pdu->iser_conn->ib_conn.device->ib_device;
|
||||
|
||||
data->dma_nents = ib_dma_map_sg(dev, data->sgl, data->size, dma_dir);
|
||||
if (data->dma_nents == 0) {
|
||||
ISER_ERR("dma_map_sg failed");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
965
sys/dev/iser/iser_verbs.c
Normal file
965
sys/dev/iser/iser_verbs.c
Normal file
@ -0,0 +1,965 @@
|
||||
/* $FreeBSD$ */
|
||||
/*-
|
||||
* Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "icl_iser.h"
|
||||
|
||||
static MALLOC_DEFINE(M_ISER_VERBS, "iser_verbs", "iser verbs backend");
|
||||
static int iser_cq_poll_limit = 512;
|
||||
|
||||
static void
|
||||
iser_cq_event_callback(struct ib_event *cause, void *context)
|
||||
{
|
||||
ISER_ERR("got cq event %d", cause->event);
|
||||
}
|
||||
|
||||
static void
|
||||
iser_qp_event_callback(struct ib_event *cause, void *context)
|
||||
{
|
||||
ISER_ERR("got qp event %d", cause->event);
|
||||
}
|
||||
|
||||
static void
|
||||
iser_event_handler(struct ib_event_handler *handler,
|
||||
struct ib_event *event)
|
||||
{
|
||||
ISER_ERR("async event %d on device %s port %d",
|
||||
event->event, event->device->name,
|
||||
event->element.port_num);
|
||||
}
|
||||
|
||||
/**
|
||||
* is_iser_tx_desc - Indicate if the completion wr_id
|
||||
* is a TX descriptor or not.
|
||||
* @iser_conn: iser connection
|
||||
* @wr_id: completion WR identifier
|
||||
*
|
||||
* Since we cannot rely on wc opcode in FLUSH errors
|
||||
* we must work around it by checking if the wr_id address
|
||||
* falls in the iser connection rx_descs buffer. If so
|
||||
* it is an RX descriptor, otherwize it is a TX.
|
||||
*/
|
||||
static inline bool
|
||||
is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id)
|
||||
{
|
||||
void *start = iser_conn->rx_descs;
|
||||
u64 len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs);
|
||||
void *end = (void *)((uintptr_t)start + (uintptr_t)len);
|
||||
|
||||
if (start) {
|
||||
if (wr_id >= start && wr_id < end)
|
||||
return false;
|
||||
} else {
|
||||
return ((uintptr_t)wr_id != (uintptr_t)iser_conn->login_resp_buf);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* iser_handle_comp_error() - Handle error completion
|
||||
* @ib_conn: connection RDMA resources
|
||||
* @wc: work completion
|
||||
*
|
||||
* Notes: Update post_recv_buf_count in case of recv error completion.
|
||||
* For non-FLUSH error completion we should also notify iscsi layer that
|
||||
* connection is failed (in case we passed bind stage).
|
||||
*/
|
||||
static void
|
||||
iser_handle_comp_error(struct ib_conn *ib_conn,
|
||||
struct ib_wc *wc)
|
||||
{
|
||||
void *wr_id = (void *)(uintptr_t)wc->wr_id;
|
||||
struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
|
||||
ib_conn);
|
||||
|
||||
if (is_iser_tx_desc(iser_conn, wr_id)) {
|
||||
ISER_DBG("conn %p got send comp error", iser_conn);
|
||||
} else {
|
||||
ISER_DBG("conn %p got recv comp error", iser_conn);
|
||||
ib_conn->post_recv_buf_count--;
|
||||
}
|
||||
if (wc->status != IB_WC_WR_FLUSH_ERR)
|
||||
iser_conn->icl_conn.ic_error(&iser_conn->icl_conn);
|
||||
}
|
||||
|
||||
/**
|
||||
* iser_handle_wc - handle a single work completion
|
||||
* @wc: work completion
|
||||
*
|
||||
* Soft-IRQ context, work completion can be either
|
||||
* SEND or RECV, and can turn out successful or
|
||||
* with error (or flush error).
|
||||
*/
|
||||
static void iser_handle_wc(struct ib_wc *wc)
|
||||
{
|
||||
struct ib_conn *ib_conn;
|
||||
struct iser_tx_desc *tx_desc;
|
||||
struct iser_rx_desc *rx_desc;
|
||||
|
||||
ib_conn = wc->qp->qp_context;
|
||||
if (likely(wc->status == IB_WC_SUCCESS)) {
|
||||
if (wc->opcode == IB_WC_RECV) {
|
||||
rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
|
||||
iser_rcv_completion(rx_desc, wc->byte_len,
|
||||
ib_conn);
|
||||
} else
|
||||
if (wc->opcode == IB_WC_SEND) {
|
||||
tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
|
||||
iser_snd_completion(tx_desc, ib_conn);
|
||||
} else {
|
||||
ISER_ERR("Unknown wc opcode %d", wc->opcode);
|
||||
}
|
||||
} else {
|
||||
struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
|
||||
ib_conn);
|
||||
if (wc->status != IB_WC_WR_FLUSH_ERR) {
|
||||
ISER_ERR("conn %p wr id %lx status %d vend_err %x",
|
||||
iser_conn, wc->wr_id, wc->status, wc->vendor_err);
|
||||
} else {
|
||||
ISER_DBG("flush error: conn %p wr id %lx", iser_conn, wc->wr_id);
|
||||
}
|
||||
|
||||
if (wc->wr_id == ISER_BEACON_WRID) {
|
||||
/* all flush errors were consumed */
|
||||
mtx_lock(&ib_conn->beacon.flush_lock);
|
||||
ISER_DBG("conn %p got ISER_BEACON_WRID", iser_conn);
|
||||
cv_signal(&ib_conn->beacon.flush_cv);
|
||||
mtx_unlock(&ib_conn->beacon.flush_lock);
|
||||
} else {
|
||||
iser_handle_comp_error(ib_conn, wc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
iser_cq_tasklet_fn(void *data, int pending)
|
||||
{
|
||||
struct iser_comp *comp = (struct iser_comp *)data;
|
||||
struct ib_cq *cq = comp->cq;
|
||||
struct ib_wc *const wcs = comp->wcs;
|
||||
int completed = 0;
|
||||
int i;
|
||||
int n;
|
||||
|
||||
while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) {
|
||||
for (i = 0; i < n; i++)
|
||||
iser_handle_wc(&wcs[i]);
|
||||
|
||||
completed += n;
|
||||
if (completed >= iser_cq_poll_limit)
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* It is assumed here that arming CQ only once its empty
|
||||
* would not cause interrupts to be missed.
|
||||
*/
|
||||
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
|
||||
}
|
||||
|
||||
static void
|
||||
iser_cq_callback(struct ib_cq *cq, void *cq_context)
|
||||
{
|
||||
struct iser_comp *comp = cq_context;
|
||||
|
||||
taskqueue_enqueue_fast(comp->tq, &comp->task);
|
||||
}
|
||||
|
||||
/**
|
||||
* iser_create_device_ib_res - creates Protection Domain (PD), Completion
|
||||
* Queue (CQ), DMA Memory Region (DMA MR) with the device associated with
|
||||
* the adapator.
|
||||
*
|
||||
* returns 0 on success, -1 on failure
|
||||
*/
|
||||
static int
|
||||
iser_create_device_ib_res(struct iser_device *device)
|
||||
{
|
||||
struct ib_device_attr *dev_attr = &device->dev_attr;
|
||||
int ret, i, max_cqe;
|
||||
|
||||
ret = ib_query_device(device->ib_device, dev_attr);
|
||||
if (ret) {
|
||||
ISER_ERR("Query device failed for %s", device->ib_device->name);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
if (!(dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) {
|
||||
ISER_ERR("device %s doesn't support Fastreg, "
|
||||
"can't register memory", device->ib_device->name);
|
||||
return (1);
|
||||
}
|
||||
|
||||
device->comps_used = min(mp_ncpus, device->ib_device->num_comp_vectors);
|
||||
|
||||
device->comps = malloc(device->comps_used * sizeof(*device->comps),
|
||||
M_ISER_VERBS, M_WAITOK | M_ZERO);
|
||||
if (!device->comps)
|
||||
goto comps_err;
|
||||
|
||||
max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe);
|
||||
|
||||
ISER_DBG("using %d CQs, device %s supports %d vectors max_cqe %d",
|
||||
device->comps_used, device->ib_device->name,
|
||||
device->ib_device->num_comp_vectors, max_cqe);
|
||||
|
||||
device->pd = ib_alloc_pd(device->ib_device);
|
||||
if (IS_ERR(device->pd))
|
||||
goto pd_err;
|
||||
|
||||
for (i = 0; i < device->comps_used; i++) {
|
||||
struct iser_comp *comp = &device->comps[i];
|
||||
|
||||
comp->device = device;
|
||||
comp->cq = ib_create_cq(device->ib_device,
|
||||
iser_cq_callback,
|
||||
iser_cq_event_callback,
|
||||
(void *)comp,
|
||||
max_cqe, i);
|
||||
if (IS_ERR(comp->cq)) {
|
||||
comp->cq = NULL;
|
||||
goto cq_err;
|
||||
}
|
||||
|
||||
if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP))
|
||||
goto cq_err;
|
||||
|
||||
TASK_INIT(&comp->task, 0, iser_cq_tasklet_fn, comp);
|
||||
comp->tq = taskqueue_create_fast("iser_taskq", M_NOWAIT,
|
||||
taskqueue_thread_enqueue, &comp->tq);
|
||||
if (!comp->tq)
|
||||
goto tq_err;
|
||||
taskqueue_start_threads(&comp->tq, 1, PI_NET, "iser taskq");
|
||||
}
|
||||
|
||||
device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
|
||||
IB_ACCESS_REMOTE_WRITE |
|
||||
IB_ACCESS_REMOTE_READ);
|
||||
if (IS_ERR(device->mr))
|
||||
goto tq_err;
|
||||
|
||||
INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device,
|
||||
iser_event_handler);
|
||||
if (ib_register_event_handler(&device->event_handler))
|
||||
goto handler_err;
|
||||
|
||||
return (0);
|
||||
|
||||
handler_err:
|
||||
ib_dereg_mr(device->mr);
|
||||
tq_err:
|
||||
for (i = 0; i < device->comps_used; i++) {
|
||||
struct iser_comp *comp = &device->comps[i];
|
||||
if (comp->tq)
|
||||
taskqueue_free(comp->tq);
|
||||
}
|
||||
cq_err:
|
||||
for (i = 0; i < device->comps_used; i++) {
|
||||
struct iser_comp *comp = &device->comps[i];
|
||||
if (comp->cq)
|
||||
ib_destroy_cq(comp->cq);
|
||||
}
|
||||
ib_dealloc_pd(device->pd);
|
||||
pd_err:
|
||||
free(device->comps, M_ISER_VERBS);
|
||||
comps_err:
|
||||
ISER_ERR("failed to allocate an IB resource");
|
||||
return (1);
|
||||
}
|
||||
|
||||
/**
|
||||
* iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR,
|
||||
* CQ and PD created with the device associated with the adapator.
|
||||
*/
|
||||
static void
|
||||
iser_free_device_ib_res(struct iser_device *device)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < device->comps_used; i++) {
|
||||
struct iser_comp *comp = &device->comps[i];
|
||||
|
||||
taskqueue_free(comp->tq);
|
||||
ib_destroy_cq(comp->cq);
|
||||
comp->cq = NULL;
|
||||
}
|
||||
|
||||
(void)ib_unregister_event_handler(&device->event_handler);
|
||||
(void)ib_dereg_mr(device->mr);
|
||||
(void)ib_dealloc_pd(device->pd);
|
||||
|
||||
free(device->comps, M_ISER_VERBS);
|
||||
device->comps = NULL;
|
||||
|
||||
device->mr = NULL;
|
||||
device->pd = NULL;
|
||||
}
|
||||
|
||||
static int
|
||||
iser_alloc_reg_res(struct ib_device *ib_device,
|
||||
struct ib_pd *pd,
|
||||
struct iser_reg_resources *res)
|
||||
{
|
||||
int ret;
|
||||
|
||||
res->frpl = ib_alloc_fast_reg_page_list(ib_device,
|
||||
ISCSI_ISER_SG_TABLESIZE + 1);
|
||||
if (IS_ERR(res->frpl)) {
|
||||
ret = -PTR_ERR(res->frpl);
|
||||
ISER_ERR("Failed to allocate fast reg page list err=%d", ret);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
res->mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE + 1);
|
||||
if (IS_ERR(res->mr)) {
|
||||
ret = -PTR_ERR(res->mr);
|
||||
ISER_ERR("Failed to allocate fast reg mr err=%d", ret);
|
||||
goto fast_reg_mr_failure;
|
||||
}
|
||||
res->mr_valid = 1;
|
||||
|
||||
return (0);
|
||||
|
||||
fast_reg_mr_failure:
|
||||
ib_free_fast_reg_page_list(res->frpl);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static void
|
||||
iser_free_reg_res(struct iser_reg_resources *rsc)
|
||||
{
|
||||
ib_dereg_mr(rsc->mr);
|
||||
ib_free_fast_reg_page_list(rsc->frpl);
|
||||
}
|
||||
|
||||
static struct fast_reg_descriptor *
|
||||
iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd)
|
||||
{
|
||||
struct fast_reg_descriptor *desc;
|
||||
int ret;
|
||||
|
||||
desc = malloc(sizeof(*desc), M_ISER_VERBS, M_WAITOK | M_ZERO);
|
||||
if (!desc) {
|
||||
ISER_ERR("Failed to allocate a new fastreg descriptor");
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
ret = iser_alloc_reg_res(ib_device, pd, &desc->rsc);
|
||||
if (ret) {
|
||||
ISER_ERR("failed to allocate reg_resources");
|
||||
goto err;
|
||||
}
|
||||
|
||||
return (desc);
|
||||
err:
|
||||
free(desc, M_ISER_VERBS);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
* iser_create_fmr_pool - Creates FMR pool and page_vector
|
||||
*
|
||||
* returns 0 on success, or errno code on failure
|
||||
*/
|
||||
int
|
||||
iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max)
|
||||
{
|
||||
struct iser_device *device = ib_conn->device;
|
||||
struct fast_reg_descriptor *desc;
|
||||
int i;
|
||||
|
||||
INIT_LIST_HEAD(&ib_conn->fastreg.pool);
|
||||
ib_conn->fastreg.pool_size = 0;
|
||||
for (i = 0; i < cmds_max; i++) {
|
||||
desc = iser_create_fastreg_desc(device->ib_device, device->pd);
|
||||
if (!desc) {
|
||||
ISER_ERR("Failed to create fastreg descriptor");
|
||||
goto err;
|
||||
}
|
||||
|
||||
list_add_tail(&desc->list, &ib_conn->fastreg.pool);
|
||||
ib_conn->fastreg.pool_size++;
|
||||
}
|
||||
|
||||
return (0);
|
||||
|
||||
err:
|
||||
iser_free_fastreg_pool(ib_conn);
|
||||
return (ENOMEM);
|
||||
}
|
||||
|
||||
/**
|
||||
* iser_free_fmr_pool - releases the FMR pool and page vec
|
||||
*/
|
||||
void
|
||||
iser_free_fastreg_pool(struct ib_conn *ib_conn)
|
||||
{
|
||||
struct fast_reg_descriptor *desc, *tmp;
|
||||
int i = 0;
|
||||
|
||||
if (list_empty(&ib_conn->fastreg.pool))
|
||||
return;
|
||||
|
||||
ISER_DBG("freeing conn %p fr pool", ib_conn);
|
||||
|
||||
list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) {
|
||||
list_del(&desc->list);
|
||||
iser_free_reg_res(&desc->rsc);
|
||||
free(desc, M_ISER_VERBS);
|
||||
++i;
|
||||
}
|
||||
|
||||
if (i < ib_conn->fastreg.pool_size)
|
||||
ISER_WARN("pool still has %d regions registered",
|
||||
ib_conn->fastreg.pool_size - i);
|
||||
}
|
||||
|
||||
/**
|
||||
* iser_create_ib_conn_res - Queue-Pair (QP)
|
||||
*
|
||||
* returns 0 on success, 1 on failure
|
||||
*/
|
||||
static int
|
||||
iser_create_ib_conn_res(struct ib_conn *ib_conn)
|
||||
{
|
||||
struct iser_conn *iser_conn;
|
||||
struct iser_device *device;
|
||||
struct ib_device_attr *dev_attr;
|
||||
struct ib_qp_init_attr init_attr;
|
||||
int index, min_index = 0;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
iser_conn = container_of(ib_conn, struct iser_conn, ib_conn);
|
||||
device = ib_conn->device;
|
||||
dev_attr = &device->dev_attr;
|
||||
|
||||
mtx_lock(&ig.connlist_mutex);
|
||||
/* select the CQ with the minimal number of usages */
|
||||
for (index = 0; index < device->comps_used; index++) {
|
||||
if (device->comps[index].active_qps <
|
||||
device->comps[min_index].active_qps)
|
||||
min_index = index;
|
||||
}
|
||||
ib_conn->comp = &device->comps[min_index];
|
||||
ib_conn->comp->active_qps++;
|
||||
mtx_unlock(&ig.connlist_mutex);
|
||||
ISER_INFO("cq index %d used for ib_conn %p", min_index, ib_conn);
|
||||
|
||||
memset(&init_attr, 0, sizeof init_attr);
|
||||
init_attr.event_handler = iser_qp_event_callback;
|
||||
init_attr.qp_context = (void *)ib_conn;
|
||||
init_attr.send_cq = ib_conn->comp->cq;
|
||||
init_attr.recv_cq = ib_conn->comp->cq;
|
||||
init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
|
||||
init_attr.cap.max_send_sge = 2;
|
||||
init_attr.cap.max_recv_sge = 1;
|
||||
init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
|
||||
init_attr.qp_type = IB_QPT_RC;
|
||||
|
||||
if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
|
||||
init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS;
|
||||
iser_conn->max_cmds =
|
||||
ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS);
|
||||
} else {
|
||||
init_attr.cap.max_send_wr = dev_attr->max_qp_wr;
|
||||
iser_conn->max_cmds =
|
||||
ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr);
|
||||
}
|
||||
ISER_DBG("device %s supports max_send_wr %d",
|
||||
device->ib_device->name, dev_attr->max_qp_wr);
|
||||
|
||||
ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
|
||||
if (ret)
|
||||
goto out_err;
|
||||
|
||||
ib_conn->qp = ib_conn->cma_id->qp;
|
||||
ISER_DBG("setting conn %p cma_id %p qp %p",
|
||||
ib_conn, ib_conn->cma_id,
|
||||
ib_conn->cma_id->qp);
|
||||
|
||||
return (ret);
|
||||
|
||||
out_err:
|
||||
mtx_lock(&ig.connlist_mutex);
|
||||
ib_conn->comp->active_qps--;
|
||||
mtx_unlock(&ig.connlist_mutex);
|
||||
ISER_ERR("unable to alloc mem or create resource, err %d", ret);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/**
|
||||
* based on the resolved device node GUID see if there already allocated
|
||||
* device for this device. If there's no such, create one.
|
||||
*/
|
||||
static struct iser_device *
|
||||
iser_device_find_by_ib_device(struct rdma_cm_id *cma_id)
|
||||
{
|
||||
struct iser_device *device;
|
||||
|
||||
sx_xlock(&ig.device_list_mutex);
|
||||
|
||||
list_for_each_entry(device, &ig.device_list, ig_list)
|
||||
/* find if there's a match using the node GUID */
|
||||
if (device->ib_device->node_guid == cma_id->device->node_guid)
|
||||
goto inc_refcnt;
|
||||
|
||||
device = malloc(sizeof *device, M_ISER_VERBS, M_WAITOK | M_ZERO);
|
||||
if (device == NULL)
|
||||
goto out;
|
||||
|
||||
/* assign this device to the device */
|
||||
device->ib_device = cma_id->device;
|
||||
/* init the device and link it into ig device list */
|
||||
if (iser_create_device_ib_res(device)) {
|
||||
free(device, M_ISER_VERBS);
|
||||
device = NULL;
|
||||
goto out;
|
||||
}
|
||||
list_add(&device->ig_list, &ig.device_list);
|
||||
|
||||
inc_refcnt:
|
||||
device->refcount++;
|
||||
ISER_INFO("device %p refcount %d", device, device->refcount);
|
||||
out:
|
||||
sx_xunlock(&ig.device_list_mutex);
|
||||
return (device);
|
||||
}
|
||||
|
||||
/* if there's no demand for this device, release it */
|
||||
static void
|
||||
iser_device_try_release(struct iser_device *device)
|
||||
{
|
||||
sx_xlock(&ig.device_list_mutex);
|
||||
device->refcount--;
|
||||
ISER_INFO("device %p refcount %d", device, device->refcount);
|
||||
if (!device->refcount) {
|
||||
iser_free_device_ib_res(device);
|
||||
list_del(&device->ig_list);
|
||||
free(device, M_ISER_VERBS);
|
||||
device = NULL;
|
||||
}
|
||||
sx_xunlock(&ig.device_list_mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Called with state mutex held
|
||||
**/
|
||||
static int iser_conn_state_comp_exch(struct iser_conn *iser_conn,
|
||||
enum iser_conn_state comp,
|
||||
enum iser_conn_state exch)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = (iser_conn->state == comp);
|
||||
if (ret)
|
||||
iser_conn->state = exch;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* iser_free_ib_conn_res - release IB related resources
|
||||
* @iser_conn: iser connection struct
|
||||
* @destroy: indicator if we need to try to release the
|
||||
* iser device and memory regoins pool (only iscsi
|
||||
* shutdown and DEVICE_REMOVAL will use this).
|
||||
*
|
||||
* This routine is called with the iser state mutex held
|
||||
* so the cm_id removal is out of here. It is Safe to
|
||||
* be invoked multiple times.
|
||||
*/
|
||||
void
|
||||
iser_free_ib_conn_res(struct iser_conn *iser_conn,
|
||||
bool destroy)
|
||||
{
|
||||
struct ib_conn *ib_conn = &iser_conn->ib_conn;
|
||||
struct iser_device *device = ib_conn->device;
|
||||
|
||||
ISER_INFO("freeing conn %p cma_id %p qp %p",
|
||||
iser_conn, ib_conn->cma_id, ib_conn->qp);
|
||||
|
||||
if (ib_conn->qp != NULL) {
|
||||
mtx_lock(&ig.connlist_mutex);
|
||||
ib_conn->comp->active_qps--;
|
||||
mtx_unlock(&ig.connlist_mutex);
|
||||
rdma_destroy_qp(ib_conn->cma_id);
|
||||
ib_conn->qp = NULL;
|
||||
}
|
||||
|
||||
if (destroy) {
|
||||
if (iser_conn->login_buf)
|
||||
iser_free_login_buf(iser_conn);
|
||||
|
||||
if (iser_conn->rx_descs)
|
||||
iser_free_rx_descriptors(iser_conn);
|
||||
|
||||
if (device != NULL) {
|
||||
iser_device_try_release(device);
|
||||
ib_conn->device = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* triggers start of the disconnect procedures and wait for them to be done
|
||||
* Called with state mutex held
|
||||
*/
|
||||
int
|
||||
iser_conn_terminate(struct iser_conn *iser_conn)
|
||||
{
|
||||
struct ib_conn *ib_conn = &iser_conn->ib_conn;
|
||||
struct ib_send_wr *bad_send_wr;
|
||||
struct ib_recv_wr *bad_recv_wr;
|
||||
int err = 0;
|
||||
|
||||
/* terminate the iser conn only if the conn state is UP */
|
||||
if (!iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP,
|
||||
ISER_CONN_TERMINATING))
|
||||
return (0);
|
||||
|
||||
ISER_INFO("iser_conn %p state %d\n", iser_conn, iser_conn->state);
|
||||
|
||||
if (ib_conn->qp == NULL) {
|
||||
/* HOW can this be??? */
|
||||
ISER_WARN("qp wasn't created");
|
||||
return (1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Todo: This is a temporary workaround.
|
||||
* We serialize the connection closure using global lock in order to
|
||||
* receive all posted beacons completions.
|
||||
* Without Serialization, in case we open many connections (QPs) on
|
||||
* the same CQ, we might miss beacons because of missing interrupts.
|
||||
*/
|
||||
sx_xlock(&ig.close_conns_mutex);
|
||||
|
||||
/*
|
||||
* In case we didn't already clean up the cma_id (peer initiated
|
||||
* a disconnection), we need to Cause the CMA to change the QP
|
||||
* state to ERROR.
|
||||
*/
|
||||
if (ib_conn->cma_id) {
|
||||
err = rdma_disconnect(ib_conn->cma_id);
|
||||
if (err)
|
||||
ISER_ERR("Failed to disconnect, conn: 0x%p err %d",
|
||||
iser_conn, err);
|
||||
|
||||
mtx_lock(&ib_conn->beacon.flush_lock);
|
||||
memset(&ib_conn->beacon.send, 0, sizeof(struct ib_send_wr));
|
||||
ib_conn->beacon.send.wr_id = ISER_BEACON_WRID;
|
||||
ib_conn->beacon.send.opcode = IB_WR_SEND;
|
||||
/* post an indication that all send flush errors were consumed */
|
||||
err = ib_post_send(ib_conn->qp, &ib_conn->beacon.send, &bad_send_wr);
|
||||
if (err) {
|
||||
ISER_ERR("conn %p failed to post send_beacon", ib_conn);
|
||||
mtx_unlock(&ib_conn->beacon.flush_lock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ISER_DBG("before send cv_wait: %p", iser_conn);
|
||||
cv_wait(&ib_conn->beacon.flush_cv, &ib_conn->beacon.flush_lock);
|
||||
ISER_DBG("after send cv_wait: %p", iser_conn);
|
||||
|
||||
memset(&ib_conn->beacon.recv, 0, sizeof(struct ib_recv_wr));
|
||||
ib_conn->beacon.recv.wr_id = ISER_BEACON_WRID;
|
||||
/* post an indication that all recv flush errors were consumed */
|
||||
err = ib_post_recv(ib_conn->qp, &ib_conn->beacon.recv, &bad_recv_wr);
|
||||
if (err) {
|
||||
ISER_ERR("conn %p failed to post recv_beacon", ib_conn);
|
||||
mtx_unlock(&ib_conn->beacon.flush_lock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ISER_DBG("before recv cv_wait: %p", iser_conn);
|
||||
cv_wait(&ib_conn->beacon.flush_cv, &ib_conn->beacon.flush_lock);
|
||||
mtx_unlock(&ib_conn->beacon.flush_lock);
|
||||
ISER_DBG("after recv cv_wait: %p", iser_conn);
|
||||
}
|
||||
out:
|
||||
sx_xunlock(&ig.close_conns_mutex);
|
||||
return (1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Called with state mutex held
|
||||
**/
|
||||
static void
|
||||
iser_connect_error(struct rdma_cm_id *cma_id)
|
||||
{
|
||||
struct iser_conn *iser_conn;
|
||||
|
||||
iser_conn = cma_id->context;
|
||||
|
||||
ISER_ERR("conn %p", iser_conn);
|
||||
|
||||
iser_conn->state = ISER_CONN_TERMINATING;
|
||||
|
||||
cv_signal(&iser_conn->up_cv);
|
||||
}
|
||||
|
||||
/**
|
||||
* Called with state mutex held
|
||||
**/
|
||||
static void
|
||||
iser_addr_handler(struct rdma_cm_id *cma_id)
|
||||
{
|
||||
struct iser_device *device;
|
||||
struct iser_conn *iser_conn;
|
||||
struct ib_conn *ib_conn;
|
||||
int ret;
|
||||
|
||||
iser_conn = cma_id->context;
|
||||
|
||||
ib_conn = &iser_conn->ib_conn;
|
||||
device = iser_device_find_by_ib_device(cma_id);
|
||||
if (!device) {
|
||||
ISER_ERR("conn %p device lookup/creation failed",
|
||||
iser_conn);
|
||||
iser_connect_error(cma_id);
|
||||
return;
|
||||
}
|
||||
|
||||
ib_conn->device = device;
|
||||
|
||||
ret = rdma_resolve_route(cma_id, 1000);
|
||||
if (ret) {
|
||||
ISER_ERR("conn %p resolve route failed: %d", iser_conn, ret);
|
||||
iser_connect_error(cma_id);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Called with state mutex held
|
||||
**/
|
||||
static void
|
||||
iser_route_handler(struct rdma_cm_id *cma_id)
|
||||
{
|
||||
struct rdma_conn_param conn_param;
|
||||
int ret;
|
||||
struct iser_cm_hdr req_hdr;
|
||||
struct iser_conn *iser_conn = cma_id->context;
|
||||
struct ib_conn *ib_conn = &iser_conn->ib_conn;
|
||||
struct iser_device *device = ib_conn->device;
|
||||
|
||||
ret = iser_create_ib_conn_res(ib_conn);
|
||||
if (ret)
|
||||
goto failure;
|
||||
|
||||
memset(&conn_param, 0, sizeof conn_param);
|
||||
conn_param.responder_resources = device->dev_attr.max_qp_rd_atom;
|
||||
conn_param.retry_count = 7;
|
||||
conn_param.rnr_retry_count = 6;
|
||||
/*
|
||||
* Initiaotr depth should not be set, but in order to compat
|
||||
* with old targets, we keep this value set.
|
||||
*/
|
||||
conn_param.initiator_depth = 1;
|
||||
|
||||
memset(&req_hdr, 0, sizeof(req_hdr));
|
||||
req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED |
|
||||
ISER_SEND_W_INV_NOT_SUPPORTED);
|
||||
conn_param.private_data = (void *)&req_hdr;
|
||||
conn_param.private_data_len = sizeof(struct iser_cm_hdr);
|
||||
|
||||
ret = rdma_connect(cma_id, &conn_param);
|
||||
if (ret) {
|
||||
ISER_ERR("conn %p failure connecting: %d", iser_conn, ret);
|
||||
goto failure;
|
||||
}
|
||||
|
||||
return;
|
||||
failure:
|
||||
iser_connect_error(cma_id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Called with state mutex held
|
||||
**/
|
||||
static void
|
||||
iser_connected_handler(struct rdma_cm_id *cma_id)
|
||||
{
|
||||
struct iser_conn *iser_conn;
|
||||
struct ib_qp_attr attr;
|
||||
struct ib_qp_init_attr init_attr;
|
||||
|
||||
iser_conn = cma_id->context;
|
||||
|
||||
(void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
|
||||
|
||||
ISER_INFO("remote qpn:%x my qpn:%x",
|
||||
attr.dest_qp_num, cma_id->qp->qp_num);
|
||||
|
||||
iser_conn->state = ISER_CONN_UP;
|
||||
|
||||
cv_signal(&iser_conn->up_cv);
|
||||
}
|
||||
|
||||
/**
|
||||
* Called with state mutex held
|
||||
**/
|
||||
static void
|
||||
iser_cleanup_handler(struct rdma_cm_id *cma_id, bool destroy)
|
||||
{
|
||||
struct iser_conn *iser_conn = cma_id->context;
|
||||
|
||||
if (iser_conn_terminate(iser_conn))
|
||||
iser_conn->icl_conn.ic_error(&iser_conn->icl_conn);
|
||||
|
||||
}
|
||||
|
||||
int
|
||||
iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
|
||||
{
|
||||
struct iser_conn *iser_conn;
|
||||
int ret = 0;
|
||||
|
||||
iser_conn = cma_id->context;
|
||||
ISER_INFO("event %d status %d conn %p id %p",
|
||||
event->event, event->status, cma_id->context, cma_id);
|
||||
|
||||
sx_xlock(&iser_conn->state_mutex);
|
||||
switch (event->event) {
|
||||
case RDMA_CM_EVENT_ADDR_RESOLVED:
|
||||
iser_addr_handler(cma_id);
|
||||
break;
|
||||
case RDMA_CM_EVENT_ROUTE_RESOLVED:
|
||||
iser_route_handler(cma_id);
|
||||
break;
|
||||
case RDMA_CM_EVENT_ESTABLISHED:
|
||||
iser_connected_handler(cma_id);
|
||||
break;
|
||||
case RDMA_CM_EVENT_ADDR_ERROR:
|
||||
case RDMA_CM_EVENT_ROUTE_ERROR:
|
||||
case RDMA_CM_EVENT_CONNECT_ERROR:
|
||||
case RDMA_CM_EVENT_UNREACHABLE:
|
||||
case RDMA_CM_EVENT_REJECTED:
|
||||
iser_connect_error(cma_id);
|
||||
break;
|
||||
case RDMA_CM_EVENT_DISCONNECTED:
|
||||
case RDMA_CM_EVENT_ADDR_CHANGE:
|
||||
case RDMA_CM_EVENT_TIMEWAIT_EXIT:
|
||||
iser_cleanup_handler(cma_id, false);
|
||||
break;
|
||||
default:
|
||||
ISER_ERR("Unexpected RDMA CM event (%d)", event->event);
|
||||
break;
|
||||
}
|
||||
sx_xunlock(&iser_conn->state_mutex);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
int
|
||||
iser_post_recvl(struct iser_conn *iser_conn)
|
||||
{
|
||||
struct ib_recv_wr rx_wr, *rx_wr_failed;
|
||||
struct ib_conn *ib_conn = &iser_conn->ib_conn;
|
||||
struct ib_sge sge;
|
||||
int ib_ret;
|
||||
|
||||
sge.addr = iser_conn->login_resp_dma;
|
||||
sge.length = ISER_RX_LOGIN_SIZE;
|
||||
sge.lkey = ib_conn->device->mr->lkey;
|
||||
|
||||
rx_wr.wr_id = (uintptr_t)iser_conn->login_resp_buf;
|
||||
rx_wr.sg_list = &sge;
|
||||
rx_wr.num_sge = 1;
|
||||
rx_wr.next = NULL;
|
||||
|
||||
ib_conn->post_recv_buf_count++;
|
||||
ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed);
|
||||
if (ib_ret) {
|
||||
ISER_ERR("ib_post_recv failed ret=%d", ib_ret);
|
||||
ib_conn->post_recv_buf_count--;
|
||||
}
|
||||
|
||||
return (ib_ret);
|
||||
}
|
||||
|
||||
int
|
||||
iser_post_recvm(struct iser_conn *iser_conn, int count)
|
||||
{
|
||||
struct ib_recv_wr *rx_wr, *rx_wr_failed;
|
||||
int i, ib_ret;
|
||||
struct ib_conn *ib_conn = &iser_conn->ib_conn;
|
||||
unsigned int my_rx_head = iser_conn->rx_desc_head;
|
||||
struct iser_rx_desc *rx_desc;
|
||||
|
||||
for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
|
||||
rx_desc = &iser_conn->rx_descs[my_rx_head];
|
||||
rx_wr->wr_id = (uintptr_t)rx_desc;
|
||||
rx_wr->sg_list = &rx_desc->rx_sg;
|
||||
rx_wr->num_sge = 1;
|
||||
rx_wr->next = rx_wr + 1;
|
||||
my_rx_head = (my_rx_head + 1) % iser_conn->qp_max_recv_dtos;
|
||||
}
|
||||
|
||||
rx_wr--;
|
||||
rx_wr->next = NULL; /* mark end of work requests list */
|
||||
|
||||
ib_conn->post_recv_buf_count += count;
|
||||
ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed);
|
||||
if (ib_ret) {
|
||||
ISER_ERR("ib_post_recv failed ret=%d", ib_ret);
|
||||
ib_conn->post_recv_buf_count -= count;
|
||||
} else
|
||||
iser_conn->rx_desc_head = my_rx_head;
|
||||
|
||||
return (ib_ret);
|
||||
}
|
||||
|
||||
/**
|
||||
* iser_start_send - Initiate a Send DTO operation
|
||||
*
|
||||
* returns 0 on success, -1 on failure
|
||||
*/
|
||||
int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
|
||||
bool signal)
|
||||
{
|
||||
int ib_ret;
|
||||
struct ib_send_wr send_wr, *send_wr_failed;
|
||||
|
||||
ib_dma_sync_single_for_device(ib_conn->device->ib_device,
|
||||
tx_desc->dma_addr, ISER_HEADERS_LEN,
|
||||
DMA_TO_DEVICE);
|
||||
|
||||
send_wr.next = NULL;
|
||||
send_wr.wr_id = (uintptr_t)tx_desc;
|
||||
send_wr.sg_list = tx_desc->tx_sg;
|
||||
send_wr.num_sge = tx_desc->num_sge;
|
||||
send_wr.opcode = IB_WR_SEND;
|
||||
send_wr.send_flags = signal ? IB_SEND_SIGNALED : 0;
|
||||
|
||||
ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed);
|
||||
if (ib_ret)
|
||||
ISER_ERR("ib_post_send failed, ret:%d", ib_ret);
|
||||
|
||||
return (ib_ret);
|
||||
}
|
32
sys/modules/iser/Makefile
Normal file
32
sys/modules/iser/Makefile
Normal file
@ -0,0 +1,32 @@
|
||||
# $FreeBSD$
|
||||
|
||||
.PATH: ${.CURDIR}/../../dev/iser/
|
||||
|
||||
.include <bsd.own.mk>
|
||||
|
||||
KMOD= iser
|
||||
|
||||
SRCS= icl_iser.c
|
||||
SRCS+= iser_initiator.c
|
||||
SRCS+= iser_memory.c
|
||||
SRCS+= iser_verbs.c
|
||||
SRCS+= vnode_if.h
|
||||
SRCS+= opt_inet.h
|
||||
SRCS+= opt_inet6.h
|
||||
SRCS+= opt_cam.h
|
||||
SRCS+= bus_if.h
|
||||
SRCS+= device_if.h
|
||||
SRCS+= icl_conn_if.h
|
||||
|
||||
CFLAGS+= -I${.CURDIR}/../../
|
||||
CFLAGS+= -I${SYSDIR}/ofed/include
|
||||
CFLAGS+= -I${SYSDIR}/compat/linuxkpi/common/include
|
||||
CFLAGS+= -DCONFIG_INFINIBAND_USER_MEM
|
||||
CFLAGS+= -DINET6 -DINET
|
||||
CFLAGS+= -fms-extensions
|
||||
|
||||
CFLAGS+=-DICL_KERNEL_PROXY
|
||||
|
||||
MFILES= kern/bus_if.m kern/device_if.m dev/iscsi/icl_conn_if.m
|
||||
|
||||
.include <bsd.kmod.mk>
|
Loading…
Reference in New Issue
Block a user