Initial drop of the hardare accelerated iSCSI driver.

Submitted by:	Sreenivasa Honnur <shonnur at chelsio dot com>
Sponsored by:	Chelsio Communications
This commit is contained in:
Navdeep Parhar 2015-02-10 23:48:06 +00:00
parent 2706e8723d
commit b4943e97bb
14 changed files with 3761 additions and 8 deletions

View File

@ -0,0 +1,74 @@
Chelsio iSCSI driver(cxgbei):
=============================
To enable offload support on Chelsion HW add below lines in /boot/loader.conf
#cat /boot/loader.conf
hw.cxgbe.config_file="uwire"
hw.cxgbe.iscsicaps_allowed=0xf
Loading cxgbe driver and enabling TOE on the interfaces. Add below lines in /etc/rc.conf
#cat /etc/rc.conf
kld_list="if_cxgbe t4_tom"
ifconfig_cxl0="inet 15.1.1.154"
ifconfig_cxl1="inet 16.1.1.154"
ifconfig cxl0 toe
ifconfig cxl1 toe
Compilation Steps:
1. Compile iscsi and ctl drivers
cd sys/modules/iscsi
make && make install
cd sys/modules/ctl
make && make install
2. Compile ctld
cd usr.sbin/ctld
make all install
2. Compile iscsictl & iscsid
cd /home/chelsio/usr.sbin/iscsid
make all install
cd /home/chelsio/usr.bin/iscsictl
make all install
3. Compile offload module
cd sys/modules/cxgbe/cxgbei
make && make install
Sample /etc/iscsi.conf
======================
t0 {
TargetAddress = 15.1.1.33
TargetName = iqn.2008-09.com.example:server.target1
HeaderDigest = CRC32C
DataDigest = CRC32C
FirstBurstLength = 8192
}
Sample /etc/ctl.conf
======================
portal-group pg0 {
discovery-auth-group no-authentication
listen 0.0.0.0
}
target iqn.2012-06.com.example:target0 {
auth-group no-authentication
portal-group pg0
lun 0 {
path /tmp/file1
size 16M
}
}
Loading iscsi-initiator:
1. service iscsid onestart
2. kldload cxgbei
3. iscsictl -aA
Loading iscsi-target:
1. service ctld onestart
2. kldload cxgbei
3. Login from initiator.

View File

@ -0,0 +1,14 @@
portal-group pg0 {
discovery-auth-group no-authentication
listen 0.0.0.0
}
target iqn.2012-06.com.example:target0 {
auth-group no-authentication
portal-group pg0
lun 0 {
path /tmp/file1
size 16M
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,158 @@
/*-
* Copyright (c) 2012 Chelsio Communications, Inc.
* All rights reserved.
*
* Chelsio T5xx iSCSI driver
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef __CXGBEI_OFLD_H__
#define __CXGBEI_OFLD_H__
#include "mbufq.h"
typedef struct iscsi_socket {
/* iscsi private */
unsigned char s_flag;
unsigned char s_cpuno; /* bind to cpuno */
unsigned char s_mode; /* offload mode */
unsigned char s_txhold;
unsigned char s_ddp_pgidx; /* ddp page selection */
unsigned char s_hcrc_len;
unsigned char s_dcrc_len;
unsigned char filler[1];
unsigned int s_tid; /* for debug only */
unsigned int s_tmax;
unsigned int s_rmax;
unsigned int s_mss;
void *s_odev; /* offload device, if any */
void *s_appdata; /* upperlayer data pointer */
void *s_private; /* underlying socket related info. */
void *s_conn; /* ic_conn pointer */
struct socket *sock;
struct mbuf_head iscsi_rcv_mbufq;/* rx - ULP mbufs */
struct mbuf_head ulp2_writeq; /* tx - ULP mbufs */
struct mbuf_head ulp2_wrq; /* tx wr- ULP mbufs */
struct mbuf *mbuf_ulp_lhdr;
struct mbuf *mbuf_ulp_ldata;
}iscsi_socket;
#define ISCSI_SG_SBUF_DMABLE 0x1
#define ISCSI_SG_SBUF_DMA_ONLY 0x2 /*private*/
#define ISCSI_SG_BUF_ALLOC 0x10
#define ISCSI_SG_PAGE_ALLOC 0x20
#define ISCSI_SG_SBUF_MAP_NEEDED 0x40
#define ISCSI_SG_SBUF_MAPPED 0x80
#define ISCSI_SG_SBUF_LISTHEAD 0x100
#define ISCSI_SG_SBUF_LISTTAIL 0x200
#define ISCSI_SG_SBUF_XFER_DONE 0x400
typedef struct cxgbei_sgl {
int sg_flag;
void *sg_addr;
void *sg_dma_addr;
size_t sg_offset;
size_t sg_length;
} cxgbei_sgl;
#define cxgbei_scsi_for_each_sg(_sgl, _sgel, _n, _i) \
for (_i = 0, _sgel = (cxgbei_sgl*) (_sgl); _i < _n; _i++, \
_sgel++)
#define sg_dma_addr(_sgel) _sgel->sg_dma_addr
#define sg_virt(_sgel) _sgel->sg_addr
#define sg_len(_sgel) _sgel->sg_length
#define sg_off(_sgel) _sgel->sg_offset
#define sg_next(_sgel) _sgel + 1
#define SBUF_ULP_FLAG_HDR_RCVD 0x1
#define SBUF_ULP_FLAG_DATA_RCVD 0x2
#define SBUF_ULP_FLAG_STATUS_RCVD 0x4
#define SBUF_ULP_FLAG_COALESCE_OFF 0x8
#define SBUF_ULP_FLAG_HCRC_ERROR 0x10
#define SBUF_ULP_FLAG_DCRC_ERROR 0x20
#define SBUF_ULP_FLAG_PAD_ERROR 0x40
#define SBUF_ULP_FLAG_DATA_DDPED 0x80
/* Flags for return value of CPL message handlers */
enum {
CPL_RET_BUF_DONE = 1, /* buffer processing done buffer may be freed */
CPL_RET_BAD_MSG = 2, /* bad CPL message (e.g., unknown opcode) */
CPL_RET_UNKNOWN_TID = 4 /* unexpected unknown TID */
};
/*
* Similar to tcp_skb_cb but with ULP elements added to support DDP, iSCSI,
* etc.
*/
struct ulp_mbuf_cb {
uint8_t ulp_mode; /* ULP mode/submode of sk_buff */
uint8_t flags; /* TCP-like flags */
uint32_t seq; /* TCP sequence number */
union { /* ULP-specific fields */
struct {
uint32_t ddigest; /* ULP rx_data_ddp selected field*/
uint32_t pdulen; /* ULP rx_data_ddp selected field*/
} iscsi;
struct {
uint32_t offset; /* ULP DDP offset notification */
uint8_t flags; /* ULP DDP flags ... */
} ddp;
} ulp;
uint8_t ulp_data[16]; /* scratch area for ULP */
void *pdu; /* pdu pointer */
};
/* private data for eack scsi task */
typedef struct cxgbei_task_data {
cxgbei_sgl sgl[256];
unsigned int nsge;
unsigned int sc_ddp_tag;
}cxgbei_task_data;
static unsigned char t4tom_cpl_handler_register_flag;
enum {
TOM_CPL_ISCSI_HDR_REGISTERED_BIT,
TOM_CPL_SET_TCB_RPL_REGISTERED_BIT,
TOM_CPL_RX_DATA_DDP_REGISTERED_BIT
};
#define ODEV_FLAG_ULP_CRC_ENABLED 0x1
#define ODEV_FLAG_ULP_DDP_ENABLED 0x2
#define ODEV_FLAG_ULP_TX_ALLOC_DIGEST 0x4
#define ODEV_FLAG_ULP_RX_PAD_INCLUDED 0x8
#define ODEV_FLAG_ULP_ENABLED \
(ODEV_FLAG_ULP_CRC_ENABLED | ODEV_FLAG_ULP_DDP_ENABLED)
struct ulp_mbuf_cb * get_ulp_mbuf_cb(struct mbuf *);
int cxgbei_conn_set_ulp_mode(struct socket *, void *);
int cxgbei_conn_close(struct socket *);
void cxgbei_conn_task_reserve_itt(void *, void **, void *, unsigned int *);
void cxgbei_conn_transfer_reserve_ttt(void *, void **, void *, unsigned int *);
void cxgbei_cleanup_task(void *, void *);
int cxgbei_conn_xmit_pdu(void *, void *);
#endif

View File

@ -0,0 +1,698 @@
/*-
* Copyright (c) 2012 Chelsio Communications, Inc.
* All rights reserved.
*
* Chelsio T5xx iSCSI driver
* cxgbei_ulp2_ddp.c: Chelsio iSCSI DDP Manager.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "opt_inet.h"
#include <sys/types.h>
#include <sys/module.h>
#include <sys/systm.h>
#include <sys/errno.h>
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/mbuf.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/condvar.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <netinet/toecore.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_fsm.h>
#include <common/common.h>
#include <common/t4_msg.h>
#include <common/t4_regs.h> /* for PCIE_MEM_ACCESS */
#include <tom/t4_tom.h>
#include "cxgbei.h"
#include "cxgbei_ulp2_ddp.h"
static inline int
cxgbei_counter_dec_and_read(volatile int *p)
{
atomic_subtract_acq_int(p, 1);
return atomic_load_acq_int(p);
}
static inline int
get_order(unsigned long size)
{
int order;
size = (size - 1) >> PAGE_SHIFT;
order = 0;
while (size) {
order++;
size >>= 1;
}
return (order);
}
/*
* Map a single buffer address.
*/
static void
ulp2_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error)
{
bus_addr_t *ba = arg;
if (error)
return;
KASSERT(nseg == 1, ("%s: %d segments returned!", __func__, nseg));
*ba = segs->ds_addr;
}
static int
ulp2_dma_tag_create(struct cxgbei_ulp2_ddp_info *ddp)
{
int rc;
rc = bus_dma_tag_create(NULL, 1, 0, BUS_SPACE_MAXADDR,
BUS_SPACE_MAXADDR, NULL, NULL, UINT32_MAX , 8,
BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL,
&ddp->ulp_ddp_tag);
if (rc != 0) {
printf("%s(%d): bus_dma_tag_create() "
"failed (rc = %d)!\n",
__FILE__, __LINE__, rc);
return rc;
}
return 0;
}
/*
* iSCSI Direct Data Placement
*
* T4/5 ulp2 h/w can directly place the iSCSI Data-In or Data-Out PDU's
* payload into pre-posted final destination host-memory buffers based on the
* Initiator Task Tag (ITT) in Data-In or Target Task Tag (TTT) in Data-Out
* PDUs.
*
* The host memory address is programmed into h/w in the format of pagepod
* entries.
* The location of the pagepod entry is encoded into ddp tag which is used or
* is the base for ITT/TTT.
*/
unsigned char ddp_page_order[DDP_PGIDX_MAX] = {0, 1, 2, 4};
unsigned char ddp_page_shift[DDP_PGIDX_MAX] = {12, 13, 14, 16};
unsigned char page_idx = DDP_PGIDX_MAX;
static inline int
ddp_find_unused_entries(struct cxgbei_ulp2_ddp_info *ddp,
unsigned int start, unsigned int max,
unsigned int count, unsigned int *idx,
struct cxgbei_ulp2_gather_list *gl)
{
unsigned int i, j, k;
/* not enough entries */
if ((max - start) < count)
return EBUSY;
max -= count;
mtx_lock(&ddp->map_lock);
for (i = start; i < max;) {
for (j = 0, k = i; j < count; j++, k++) {
if (ddp->gl_map[k])
break;
}
if (j == count) {
for (j = 0, k = i; j < count; j++, k++)
ddp->gl_map[k] = gl;
mtx_unlock(&ddp->map_lock);
*idx = i;
return 0;
}
i += j + 1;
}
mtx_unlock(&ddp->map_lock);
return EBUSY;
}
static inline void
ddp_unmark_entries(struct cxgbei_ulp2_ddp_info *ddp,
int start, int count)
{
mtx_lock(&ddp->map_lock);
memset(&ddp->gl_map[start], 0,
count * sizeof(struct cxgbei_ulp2_gather_list *));
mtx_unlock(&ddp->map_lock);
}
/**
* cxgbei_ulp2_ddp_find_page_index - return ddp page index for a given page size
* @pgsz: page size
* return the ddp page index, if no match is found return DDP_PGIDX_MAX.
*/
int
cxgbei_ulp2_ddp_find_page_index(unsigned long pgsz)
{
int i;
for (i = 0; i < DDP_PGIDX_MAX; i++) {
if (pgsz == (1UL << ddp_page_shift[i]))
return i;
}
CTR1(KTR_CXGBE, "ddp page size 0x%lx not supported.\n", pgsz);
return DDP_PGIDX_MAX;
}
static int
cxgbei_ulp2_ddp_adjust_page_table(void)
{
int i;
unsigned int base_order, order;
if (PAGE_SIZE < (1UL << ddp_page_shift[0])) {
CTR2(KTR_CXGBE, "PAGE_SIZE %u too small, min. %lu.\n",
PAGE_SIZE, 1UL << ddp_page_shift[0]);
return EINVAL;
}
base_order = get_order(1UL << ddp_page_shift[0]);
order = get_order(1 << PAGE_SHIFT);
for (i = 0; i < DDP_PGIDX_MAX; i++) {
/* first is the kernel page size, then just doubling the size */
ddp_page_order[i] = order - base_order + i;
ddp_page_shift[i] = PAGE_SHIFT + i;
}
return 0;
}
static inline void
ddp_gl_unmap(struct toedev *tdev,
struct cxgbei_ulp2_gather_list *gl)
{
int i;
struct adapter *sc = tdev->tod_softc;
struct cxgbei_ulp2_ddp_info *ddp = sc->iscsi_softc;
if (!gl->pages[0])
return;
for (i = 0; i < gl->nelem; i++) {
bus_dmamap_unload(ddp->ulp_ddp_tag, gl->dma_sg[i].bus_map);
bus_dmamap_destroy(ddp->ulp_ddp_tag, gl->dma_sg[i].bus_map);
}
}
static inline int
ddp_gl_map(struct toedev *tdev,
struct cxgbei_ulp2_gather_list *gl)
{
int i, rc;
bus_addr_t pa;
struct cxgbei_ulp2_ddp_info *ddp;
struct adapter *sc = tdev->tod_softc;
ddp = (struct cxgbei_ulp2_ddp_info *)sc->iscsi_softc;
if (ddp == NULL) {
printf("%s: DDP is NULL tdev:%p sc:%p ddp:%p\n",
__func__, tdev, sc, ddp);
return ENOMEM;
}
mtx_lock(&ddp->map_lock);
for (i = 0; i < gl->nelem; i++) {
rc = bus_dmamap_create(ddp->ulp_ddp_tag, 0,
&gl->dma_sg[i].bus_map);
if (rc != 0) {
printf("%s: unable to map page 0x%p.\n",
__func__, gl->pages[i]);
goto unmap;
}
rc = bus_dmamap_load(ddp->ulp_ddp_tag, gl->dma_sg[i].bus_map,
gl->pages[i], PAGE_SIZE, ulp2_dma_map_addr,
&pa, BUS_DMA_NOWAIT);
if (rc != 0) {
printf("%s:unable to load page 0x%p.\n",
__func__, gl->pages[i]);
goto unmap;
}
gl->dma_sg[i].phys_addr = pa;
}
mtx_unlock(&ddp->map_lock);
return 0;
unmap:
if (i) {
unsigned int nelem = gl->nelem;
gl->nelem = i;
ddp_gl_unmap(tdev, gl);
gl->nelem = nelem;
}
return ENOMEM;
}
/**
* cxgbei_ulp2_ddp_make_gl_from_iscsi_sgvec - build ddp page buffer list
* @xferlen: total buffer length
* @sgl: page buffer scatter-gather list (struct cxgbei_sgl)
* @sgcnt: # of page buffers
* @gfp: allocation mode
*
* construct a ddp page buffer list from the scsi scattergather list.
* coalesce buffers as much as possible, and obtain dma addresses for
* each page.
*
* Return the cxgbei_ulp2_gather_list constructed from the page buffers if the
* memory can be used for ddp. Return NULL otherwise.
*/
struct cxgbei_ulp2_gather_list *
cxgbei_ulp2_ddp_make_gl_from_iscsi_sgvec
(unsigned int xferlen, cxgbei_sgl *sgl,
unsigned int sgcnt, void *tdev,
int gfp)
{
struct cxgbei_ulp2_gather_list *gl;
cxgbei_sgl *sg = sgl;
void *sgpage = (void *)((u64)sg->sg_addr & (~PAGE_MASK));
unsigned int sglen = sg->sg_length;
unsigned int sgoffset = (u64)sg->sg_addr & PAGE_MASK;
unsigned int npages = (xferlen + sgoffset + PAGE_SIZE - 1) >>
PAGE_SHIFT;
int i = 1, j = 0;
if (xferlen <= DDP_THRESHOLD) {
CTR2(KTR_CXGBE, "xfer %u < threshold %u, no ddp.\n",
xferlen, DDP_THRESHOLD);
return NULL;
}
gl = malloc(sizeof(struct cxgbei_ulp2_gather_list) +
npages * (sizeof(struct dma_segments) + sizeof(void *)),
M_DEVBUF, M_NOWAIT | M_ZERO);
if (gl == NULL) {
printf("%s: gl alloc failed\n", __func__);
return NULL;
}
gl->pages = (void **)&gl->dma_sg[npages];
gl->length = xferlen;
gl->offset = sgoffset;
gl->pages[0] = sgpage;
CTR6(KTR_CXGBE,
"%s: xferlen:0x%x len:0x%x off:0x%x sg_addr:%p npages:%d\n",
__func__, xferlen, gl->length, gl->offset, sg->sg_addr, npages);
for (i = 1, sg = sg_next(sg); i < sgcnt; i++, sg = sg_next(sg)) {
void *page = sg->sg_addr;
if (sgpage == page && sg->sg_offset == sgoffset + sglen)
sglen += sg->sg_length;
else {
/* make sure the sgl is fit for ddp:
* each has the same page size, and
* all of the middle pages are used completely
*/
if ((j && sgoffset) ||
((i != sgcnt - 1) &&
((sglen + sgoffset) & ~CXGBEI_PAGE_MASK))){
goto error_out;
}
j++;
if (j == gl->nelem || sg->sg_offset) {
goto error_out;
}
gl->pages[j] = page;
sglen = sg->sg_length;
sgoffset = sg->sg_offset;
sgpage = page;
}
}
gl->nelem = ++j;
if (ddp_gl_map(tdev, gl) < 0)
goto error_out;
return gl;
error_out:
free(gl, M_DEVBUF);
return NULL;
}
/**
* cxgbei_ulp2_ddp_release_gl - release a page buffer list
* @gl: a ddp page buffer list
* @pdev: pci_dev used for pci_unmap
* free a ddp page buffer list resulted from cxgbei_ulp2_ddp_make_gl().
*/
void
cxgbei_ulp2_ddp_release_gl(struct cxgbei_ulp2_gather_list *gl, void *tdev)
{
ddp_gl_unmap(tdev, gl);
free(gl, M_DEVBUF);
}
/**
* cxgbei_ulp2_ddp_tag_reserve - set up ddp for a data transfer
* @ddp: adapter's ddp info
* @tid: connection id
* @tformat: tag format
* @tagp: contains s/w tag initially, will be updated with ddp/hw tag
* @gl: the page momory list
* @gfp: allocation mode
*
* ddp setup for a given page buffer list and construct the ddp tag.
* return 0 if success, < 0 otherwise.
*/
int
cxgbei_ulp2_ddp_tag_reserve(struct cxgbei_ulp2_ddp_info *ddp,
void *isock, unsigned int tid,
struct cxgbei_ulp2_tag_format *tformat,
u32 *tagp, struct cxgbei_ulp2_gather_list *gl,
int gfp, int reply)
{
struct cxgbei_ulp2_pagepod_hdr hdr;
unsigned int npods, idx;
int rv;
u32 sw_tag = *tagp;
u32 tag;
if (page_idx >= DDP_PGIDX_MAX || !ddp || !gl || !gl->nelem ||
gl->length < DDP_THRESHOLD) {
CTR3(KTR_CXGBE, "pgidx %u, xfer %u/%u, NO ddp.\n",
page_idx, gl->length, DDP_THRESHOLD);
return EINVAL;
}
npods = (gl->nelem + IPPOD_PAGES_MAX - 1) >> IPPOD_PAGES_SHIFT;
if (ddp->idx_last == ddp->nppods)
rv = ddp_find_unused_entries(ddp, 0, ddp->nppods,
npods, &idx, gl);
else {
rv = ddp_find_unused_entries(ddp, ddp->idx_last + 1,
ddp->nppods, npods, &idx, gl);
if (rv && ddp->idx_last >= npods) {
rv = ddp_find_unused_entries(ddp, 0,
min(ddp->idx_last + npods, ddp->nppods),
npods, &idx, gl);
}
}
if (rv) {
CTR3(KTR_CXGBE, "xferlen %u, gl %u, npods %u NO DDP.\n",
gl->length, gl->nelem, npods);
return rv;
}
tag = cxgbei_ulp2_ddp_tag_base(idx, ddp, tformat, sw_tag);
CTR4(KTR_CXGBE, "%s: sw_tag:0x%x idx:0x%x tag:0x%x\n",
__func__, sw_tag, idx, tag);
hdr.rsvd = 0;
hdr.vld_tid = htonl(F_IPPOD_VALID | V_IPPOD_TID(tid));
hdr.pgsz_tag_clr = htonl(tag & ddp->rsvd_tag_mask);
hdr.maxoffset = htonl(gl->length);
hdr.pgoffset = htonl(gl->offset);
rv = ddp->ddp_set_map(ddp, isock, &hdr, idx, npods, gl, reply);
if (rv < 0)
goto unmark_entries;
ddp->idx_last = idx;
*tagp = tag;
return 0;
unmark_entries:
ddp_unmark_entries(ddp, idx, npods);
return rv;
}
/**
* cxgbei_ulp2_ddp_tag_release - release a ddp tag
* @ddp: adapter's ddp info
* @tag: ddp tag
* ddp cleanup for a given ddp tag and release all the resources held
*/
void
cxgbei_ulp2_ddp_tag_release(struct cxgbei_ulp2_ddp_info *ddp, u32 tag,
iscsi_socket *isock)
{
u32 idx;
if (ddp == NULL) {
CTR2(KTR_CXGBE, "%s:release ddp tag 0x%x, ddp NULL.\n",
__func__, tag);
return;
}
if (isock == NULL)
return;
idx = (tag >> IPPOD_IDX_SHIFT) & ddp->idx_mask;
CTR3(KTR_CXGBE, "tag:0x%x idx:0x%x nppods:0x%x\n",
tag, idx, ddp->nppods);
if (idx < ddp->nppods) {
struct cxgbei_ulp2_gather_list *gl = ddp->gl_map[idx];
unsigned int npods;
if (!gl || !gl->nelem) {
CTR4(KTR_CXGBE,
"release 0x%x, idx 0x%x, gl 0x%p, %u.\n",
tag, idx, gl, gl ? gl->nelem : 0);
return;
}
npods = (gl->nelem + IPPOD_PAGES_MAX - 1) >> IPPOD_PAGES_SHIFT;
CTR3(KTR_CXGBE, "ddp tag 0x%x, release idx 0x%x, npods %u.\n",
tag, idx, npods);
ddp->ddp_clear_map(ddp, gl, tag, idx, npods, isock);
ddp_unmark_entries(ddp, idx, npods);
cxgbei_ulp2_ddp_release_gl(gl, ddp->tdev);
} else
CTR3(KTR_CXGBE, "ddp tag 0x%x, idx 0x%x > max 0x%x.\n",
tag, idx, ddp->nppods);
}
/**
* cxgbei_ulp2_adapter_ddp_info - read the adapter's ddp information
* @ddp: adapter's ddp info
* @tformat: tag format
* @txsz: max tx pdu payload size, filled in by this func.
* @rxsz: max rx pdu payload size, filled in by this func.
* setup the tag format for a given iscsi entity
*/
int
cxgbei_ulp2_adapter_ddp_info(struct cxgbei_ulp2_ddp_info *ddp,
struct cxgbei_ulp2_tag_format *tformat,
unsigned int *txsz, unsigned int *rxsz)
{
unsigned char idx_bits;
if (tformat == NULL)
return EINVAL;
if (ddp == NULL)
return EINVAL;
idx_bits = 32 - tformat->sw_bits;
tformat->sw_bits = ddp->idx_bits;
tformat->rsvd_bits = ddp->idx_bits;
tformat->rsvd_shift = IPPOD_IDX_SHIFT;
tformat->rsvd_mask = (1 << tformat->rsvd_bits) - 1;
CTR4(KTR_CXGBE, "tag format: sw %u, rsvd %u,%u, mask 0x%x.\n",
tformat->sw_bits, tformat->rsvd_bits,
tformat->rsvd_shift, tformat->rsvd_mask);
*txsz = min(ULP2_MAX_PDU_PAYLOAD,
ddp->max_txsz - ISCSI_PDU_NONPAYLOAD_LEN);
*rxsz = min(ULP2_MAX_PDU_PAYLOAD,
ddp->max_rxsz - ISCSI_PDU_NONPAYLOAD_LEN);
CTR4(KTR_CXGBE, "max payload size: %u/%u, %u/%u.\n",
*txsz, ddp->max_txsz, *rxsz, ddp->max_rxsz);
return 0;
}
/**
* cxgbei_ulp2_ddp_cleanup - release the cxgbX adapter's ddp resource
* @tdev: t4cdev adapter
* release all the resource held by the ddp pagepod manager for a given
* adapter if needed
*/
void
cxgbei_ulp2_ddp_cleanup(struct cxgbei_ulp2_ddp_info **ddp_pp)
{
int i = 0;
struct cxgbei_ulp2_ddp_info *ddp = *ddp_pp;
if (ddp == NULL)
return;
CTR2(KTR_CXGBE, "tdev, release ddp 0x%p, ref %d.\n",
ddp, atomic_load_acq_int(&ddp->refcnt));
if (ddp && (cxgbei_counter_dec_and_read(&ddp->refcnt) == 0)) {
*ddp_pp = NULL;
while (i < ddp->nppods) {
struct cxgbei_ulp2_gather_list *gl = ddp->gl_map[i];
if (gl) {
int npods = (gl->nelem + IPPOD_PAGES_MAX - 1)
>> IPPOD_PAGES_SHIFT;
CTR2(KTR_CXGBE,
"tdev, ddp %d + %d.\n", i, npods);
free(gl, M_DEVBUF);
i += npods;
} else
i++;
}
bus_dmamap_unload(ddp->ulp_ddp_tag, ddp->ulp_ddp_map);
cxgbei_ulp2_free_big_mem(ddp);
}
}
/**
* ddp_init - initialize the cxgb3/4 adapter's ddp resource
* @tdev_name: device name
* @tdev: device
* @ddp: adapter's ddp info
* @uinfo: adapter's iscsi info
* initialize the ddp pagepod manager for a given adapter
*/
static void
ddp_init(void *tdev,
struct cxgbei_ulp2_ddp_info **ddp_pp,
struct ulp_iscsi_info *uinfo)
{
struct cxgbei_ulp2_ddp_info *ddp = *ddp_pp;
unsigned int ppmax, bits;
int i, rc;
if (uinfo->ulimit <= uinfo->llimit) {
printf("%s: tdev, ddp 0x%x >= 0x%x.\n",
__func__, uinfo->llimit, uinfo->ulimit);
return;
}
if (ddp) {
atomic_add_acq_int(&ddp->refcnt, 1);
CTR2(KTR_CXGBE, "tdev, ddp 0x%p already set up, %d.\n",
ddp, atomic_load_acq_int(&ddp->refcnt));
return;
}
ppmax = (uinfo->ulimit - uinfo->llimit + 1) >> IPPOD_SIZE_SHIFT;
if (ppmax <= 1024) {
CTR3(KTR_CXGBE, "tdev, ddp 0x%x ~ 0x%x, nppod %u < 1K.\n",
uinfo->llimit, uinfo->ulimit, ppmax);
return;
}
bits = (fls(ppmax) - 1) + 1;
if (bits > IPPOD_IDX_MAX_SIZE)
bits = IPPOD_IDX_MAX_SIZE;
ppmax = (1 << (bits - 1)) - 1;
ddp = cxgbei_ulp2_alloc_big_mem(sizeof(struct cxgbei_ulp2_ddp_info) +
ppmax * (sizeof(struct cxgbei_ulp2_gather_list *) +
sizeof(unsigned char)));
if (ddp == NULL) {
CTR1(KTR_CXGBE, "unable to alloc ddp 0x%d, ddp disabled.\n",
ppmax);
return;
}
ddp->colors = (unsigned char *)(ddp + 1);
ddp->gl_map = (struct cxgbei_ulp2_gather_list **)(ddp->colors +
ppmax * sizeof(unsigned char));
*ddp_pp = ddp;
mtx_init(&ddp->map_lock, "ddp lock", NULL,
MTX_DEF | MTX_DUPOK| MTX_RECURSE);
atomic_set_acq_int(&ddp->refcnt, 1);
/* dma_tag create */
rc = ulp2_dma_tag_create(ddp);
if (rc) {
printf("%s: unable to alloc ddp 0x%d, ddp disabled.\n",
__func__, ppmax);
return;
}
ddp->tdev = tdev;
ddp->max_txsz = min(uinfo->max_txsz, ULP2_MAX_PKT_SIZE);
ddp->max_rxsz = min(uinfo->max_rxsz, ULP2_MAX_PKT_SIZE);
ddp->llimit = uinfo->llimit;
ddp->ulimit = uinfo->ulimit;
ddp->nppods = ppmax;
ddp->idx_last = ppmax;
ddp->idx_bits = bits;
ddp->idx_mask = (1 << bits) - 1;
ddp->rsvd_tag_mask = (1 << (bits + IPPOD_IDX_SHIFT)) - 1;
CTR2(KTR_CXGBE,
"gl map 0x%p, idx_last %u.\n", ddp->gl_map, ddp->idx_last);
uinfo->tagmask = ddp->idx_mask << IPPOD_IDX_SHIFT;
for (i = 0; i < DDP_PGIDX_MAX; i++)
uinfo->pgsz_factor[i] = ddp_page_order[i];
uinfo->ulimit = uinfo->llimit + (ppmax << IPPOD_SIZE_SHIFT);
printf("nppods %u, bits %u, mask 0x%x,0x%x pkt %u/%u,"
" %u/%u.\n",
ppmax, ddp->idx_bits, ddp->idx_mask,
ddp->rsvd_tag_mask, ddp->max_txsz, uinfo->max_txsz,
ddp->max_rxsz, uinfo->max_rxsz);
rc = bus_dmamap_create(ddp->ulp_ddp_tag, 0, &ddp->ulp_ddp_map);
if (rc != 0) {
printf("%s: bus_dmamap_Create failed\n", __func__);
return;
}
}
/**
* cxgbei_ulp2_ddp_init - initialize ddp functions
*/
void
cxgbei_ulp2_ddp_init(void *tdev,
struct cxgbei_ulp2_ddp_info **ddp_pp,
struct ulp_iscsi_info *uinfo)
{
if (page_idx == DDP_PGIDX_MAX) {
page_idx = cxgbei_ulp2_ddp_find_page_index(PAGE_SIZE);
if (page_idx == DDP_PGIDX_MAX) {
if (cxgbei_ulp2_ddp_adjust_page_table()) {
CTR1(KTR_CXGBE, "PAGE_SIZE %x, ddp disabled.\n",
PAGE_SIZE);
return;
}
}
page_idx = cxgbei_ulp2_ddp_find_page_index(PAGE_SIZE);
}
ddp_init(tdev, ddp_pp, uinfo);
}

View File

@ -0,0 +1,348 @@
/*-
* Copyright (c) 2012 Chelsio Communications, Inc.
* All rights reserved.
*
* Chelsio T5xx iSCSI driver
* cxgbei_ulp2_ddp.c: Chelsio iSCSI DDP Manager.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef __CXGBEI_ULP2_DDP_H__
#define __CXGBEI_ULP2_DDP_H__
#include <sys/malloc.h>
#include <sys/sglist.h>
#include <sys/pciio.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/uma.h>
/*
* Structure used to return information to the iscsi layer.
*/
struct ulp_iscsi_info {
unsigned int offset;
unsigned int llimit;
unsigned int ulimit;
unsigned int tagmask;
unsigned char pgsz_factor[4];
unsigned int max_rxsz;
unsigned int max_txsz;
};
/*
* struct cxgbei_ulp2_tag_format - cxgbei ulp tag format for an iscsi entity
*
* @sw_bits: # of bits used by iscsi software layer
* @rsvd_bits: # of bits used by h/w
* @rsvd_shift: h/w bits shift left
* @rsvd_mask: reserved bit mask
*/
typedef struct cxgbei_ulp2_tag_format {
unsigned char sw_bits;
unsigned char rsvd_bits;
unsigned char rsvd_shift;
unsigned char filler[1];
uint32_t rsvd_mask;
}cxgbei_ulp2_tag_format;
#define CXGBEI_PAGE_MASK (~(PAGE_SIZE-1))
#define DDP_THRESHOLD 2048
/*
* cxgbei ddp tag are 32 bits, it consists of reserved bits used by h/w and
* non-reserved bits that can be used by the iscsi s/w.
* The reserved bits are identified by the rsvd_bits and rsvd_shift fields
* in struct cxgbei_ulp2_tag_format.
*
* The upper most reserved bit can be used to check if a tag is ddp tag or not:
* if the bit is 0, the tag is a valid ddp tag
*/
/*
* cxgbei_ulp2_is_ddp_tag - check if a given tag is a hw/ddp tag
* @tformat: tag format information
* @tag: tag to be checked
*
* return true if the tag is a ddp tag, false otherwise.
*/
static inline int
cxgbei_ulp2_is_ddp_tag(struct cxgbei_ulp2_tag_format *tformat, uint32_t tag)
{
return !(tag & (1 << (tformat->rsvd_bits + tformat->rsvd_shift - 1)));
}
/*
* cxgbei_ulp2_sw_tag_usable - check if s/w tag has enough bits left for hw bits
* @tformat: tag format information
* @sw_tag: s/w tag to be checked
*
* return true if the tag can be used for hw ddp tag, false otherwise.
*/
static inline int
cxgbei_ulp2_sw_tag_usable(struct cxgbei_ulp2_tag_format *tformat,
uint32_t sw_tag)
{
return 1;
sw_tag >>= (32 - tformat->rsvd_bits + tformat->rsvd_shift);
return !sw_tag;
}
/*
* cxgbei_ulp2_set_non_ddp_tag - mark a given s/w tag as an invalid ddp tag
* @tformat: tag format information
* @sw_tag: s/w tag to be checked
*
* insert 1 at the upper most reserved bit to mark it as an invalid ddp tag.
*/
static inline uint32_t
cxgbei_ulp2_set_non_ddp_tag(struct cxgbei_ulp2_tag_format *tformat,
uint32_t sw_tag)
{
uint32_t rsvd_bits = tformat->rsvd_bits + tformat->rsvd_shift;
if (sw_tag) {
u32 v1 = sw_tag & ((1 << (rsvd_bits - 1)) - 1);
u32 v2 = (sw_tag >> (rsvd_bits - 1)) << rsvd_bits;
return v2 | (1 << (rsvd_bits - 1)) | v1;
}
return sw_tag | (1 << (rsvd_bits - 1)) ;
}
struct dma_segments {
bus_dmamap_t bus_map;
bus_addr_t phys_addr;
};
/*
* struct cxgbei_ulp2_gather_list - cxgbei direct data placement memory
*
* @tag: ddp tag
* @length: total data buffer length
* @offset: initial offset to the 1st page
* @nelem: # of pages
* @pages: page pointers
* @phys_addr: physical address
*/
struct cxgbei_ulp2_gather_list {
uint32_t tag;
uint32_t tid;
uint32_t port_id;
void *egress_dev;
unsigned int length;
unsigned int offset;
unsigned int nelem;
bus_size_t mapsize;
bus_dmamap_t bus_map;
bus_dma_segment_t *segments;
void **pages;
struct dma_segments dma_sg[0];
};
struct cxgbei_ulp2_pagepod_hdr;
/*
* struct cxgbei_ulp2_ddp_info - direct data placement for pdu payload
*
* @list: list head to link elements
* @refcnt: count of iscsi entities using it
* @tdev: pointer to tXcdev used by cxgbX driver
* @max_txsz: max tx packet size for ddp
* @max_rxsz: max rx packet size for ddp
* @llimit: lower bound of the page pod memory
* @ulimit: upper bound of the page pod memory
* @nppods: # of page pod entries
* @idx_last: page pod entry last used
* @idx_bits: # of bits the pagepod index would take
* @idx_mask: pagepod index mask
* @rsvd_tag_mask: tag mask
* @map_lock: lock to synchonize access to the page pod map
* @gl_map: ddp memory gather list
*/
struct cxgbei_ulp2_ddp_info {
SLIST_ENTRY(cxgbei_ulp2_ddp_info) cxgbei_ulp2_ddp_list;
volatile int refcnt;
void *tdev; /* t5odev */
unsigned int max_txsz;
unsigned int max_rxsz;
unsigned int llimit;
unsigned int ulimit;
unsigned int nppods;
unsigned int idx_last;
unsigned char idx_bits;
unsigned char filler[3];
uint32_t idx_mask;
uint32_t rsvd_tag_mask;
bus_addr_t rsvd_page_phys_addr;
int (*ddp_set_map)(struct cxgbei_ulp2_ddp_info *ddp,
void *isock,
struct cxgbei_ulp2_pagepod_hdr *hdr,
unsigned int idx, unsigned int npods,
struct cxgbei_ulp2_gather_list *gl, int reply);
void (*ddp_clear_map)(struct cxgbei_ulp2_ddp_info *ddp,
struct cxgbei_ulp2_gather_list *gl,
unsigned int tag, unsigned int idx,
unsigned int npods,
iscsi_socket *isock);
struct mtx map_lock;
bus_dma_tag_t ulp_ddp_tag;
bus_dmamap_t ulp_ddp_map;
unsigned char *colors;
struct cxgbei_ulp2_gather_list **gl_map;
};
#define IPPOD_SIZE sizeof(struct cxgbei_ulp2_pagepod) /* 64 */
#define IPPOD_SIZE_SHIFT 6
#define IPPOD_COLOR_SHIFT 0
#define IPPOD_COLOR_SIZE 6
#define IPPOD_COLOR_MASK ((1 << IPPOD_COLOR_SIZE) - 1)
#define IPPOD_IDX_SHIFT IPPOD_COLOR_SIZE
#define IPPOD_IDX_MAX_SIZE 24
#define S_IPPOD_TID 0
#define M_IPPOD_TID 0xFFFFFF
#define V_IPPOD_TID(x) ((x) << S_IPPOD_TID)
#define S_IPPOD_VALID 24
#define V_IPPOD_VALID(x) ((x) << S_IPPOD_VALID)
#define F_IPPOD_VALID V_IPPOD_VALID(1U)
#define S_IPPOD_COLOR 0
#define M_IPPOD_COLOR 0x3F
#define V_IPPOD_COLOR(x) ((x) << S_IPPOD_COLOR)
#define S_IPPOD_TAG 6
#define M_IPPOD_TAG 0xFFFFFF
#define V_IPPOD_TAG(x) ((x) << S_IPPOD_TAG)
#define S_IPPOD_PGSZ 30
#define M_IPPOD_PGSZ 0x3
#define V_IPPOD_PGSZ(x) ((x) << S_IPPOD_PGSZ)
static inline uint32_t
cxgbei_ulp2_ddp_tag_base(unsigned int idx, struct cxgbei_ulp2_ddp_info *ddp,
struct cxgbei_ulp2_tag_format *tformat, uint32_t sw_tag)
{
ddp->colors[idx]++;
if (ddp->colors[idx] == (1 << IPPOD_IDX_SHIFT))
ddp->colors[idx] = 0;
sw_tag <<= (tformat->rsvd_bits + tformat->rsvd_shift);
return sw_tag | (idx << 6) | ddp->colors[idx];
}
#define ISCSI_PDU_NONPAYLOAD_LEN 312 /* bhs(48) + ahs(256) + digest(8) */
/*
* align pdu size to multiple of 512 for better performance
*/
#define cxgbei_align_pdu_size(n) do { n = (n) & (~511); } while (0)
#define ULP2_MAX_PKT_SIZE 16224
#define ULP2_MAX_PDU_PAYLOAD (ULP2_MAX_PKT_SIZE - ISCSI_PDU_NONPAYLOAD_LEN)
#define IPPOD_PAGES_MAX 4
#define IPPOD_PAGES_SHIFT 2 /* 4 pages per pod */
/*
* struct pagepod_hdr, pagepod - pagepod format
*/
struct cxgbei_ulp2_pagepod_hdr {
uint32_t vld_tid;
uint32_t pgsz_tag_clr;
uint32_t maxoffset;
uint32_t pgoffset;
uint64_t rsvd;
};
struct cxgbei_ulp2_pagepod {
struct cxgbei_ulp2_pagepod_hdr hdr;
uint64_t addr[IPPOD_PAGES_MAX + 1];
};
/*
* ddp page size array
*/
#define DDP_PGIDX_MAX 4
extern unsigned char ddp_page_order[DDP_PGIDX_MAX];
extern unsigned char page_idx;
/*
* large memory chunk allocation/release
* use vmalloc() if kmalloc() fails
*/
static inline void *
cxgbei_ulp2_alloc_big_mem(unsigned int size)
{
void *p = NULL;
p = malloc(size, M_TEMP, M_NOWAIT | M_ZERO);
return p;
}
static inline void
cxgbei_ulp2_free_big_mem(void *addr)
{
free(addr, M_TEMP);
}
int cxgbei_ulp2_ddp_tag_reserve(struct cxgbei_ulp2_ddp_info *,
void *, unsigned int ,
struct cxgbei_ulp2_tag_format *, uint32_t *,
struct cxgbei_ulp2_gather_list *, int , int );
void cxgbei_ulp2_ddp_tag_release(struct cxgbei_ulp2_ddp_info *,
uint32_t, iscsi_socket *);
struct cxgbei_ulp2_gather_list *cxgbei_ulp2_ddp_make_gl(unsigned int ,
struct sglist *,
unsigned int ,
struct pci_conf *,
int);
struct cxgbei_ulp2_gather_list *cxgbei_ulp2_ddp_make_gl_from_iscsi_sgvec(
unsigned int,
cxgbei_sgl *,
unsigned int,
void *,
int);
void cxgbei_ulp2_ddp_release_gl(struct cxgbei_ulp2_gather_list *, void *);
int cxgbei_ulp2_ddp_find_page_index(unsigned long);
int cxgbei_ulp2_adapter_ddp_info(struct cxgbei_ulp2_ddp_info *,
struct cxgbei_ulp2_tag_format *,
unsigned int *, unsigned int *);
void cxgbei_ulp2_ddp_cleanup(struct cxgbei_ulp2_ddp_info **);
void cxgbei_ulp2_ddp_init(void *,
struct cxgbei_ulp2_ddp_info **,
struct ulp_iscsi_info *);
int cxgbei_ulp2_init(void);
void cxgbei_ulp2_exit(void);
#endif

View File

@ -0,0 +1,816 @@
/*-
* Copyright (c) 2012 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Edward Tomasz Napierala under sponsorship
* from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
/*
* cxgbei implementation of iSCSI Common Layer kobj(9) interface.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/capsicum.h>
#include <sys/condvar.h>
#include <sys/conf.h>
#include <sys/file.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/lock.h>
#include <sys/mbuf.h>
#include <sys/mutex.h>
#include <sys/module.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/sx.h>
#include <sys/uio.h>
#include <vm/uma.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <dev/iscsi/icl.h>
#include <dev/iscsi/iscsi_proto.h>
#include <icl_conn_if.h>
#include "cxgbei.h"
SYSCTL_NODE(_kern_icl, OID_AUTO, cxgbei, CTLFLAG_RD, 0, "Chelsio iSCSI offload");
static int coalesce = 1;
SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, coalesce, CTLFLAG_RWTUN,
&coalesce, 0, "Try to coalesce PDUs before sending");
static int partial_receive_len = 128 * 1024;
SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN,
&partial_receive_len, 0, "Minimum read size for partially received "
"data segment");
static int sendspace = 1048576;
SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, sendspace, CTLFLAG_RWTUN,
&sendspace, 0, "Default send socket buffer size");
static int recvspace = 1048576;
SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, recvspace, CTLFLAG_RWTUN,
&recvspace, 0, "Default receive socket buffer size");
static MALLOC_DEFINE(M_ICL_CXGBEI, "icl_cxgbei", "iSCSI software backend");
static uma_zone_t icl_pdu_zone;
static uma_zone_t icl_transfer_zone;
static volatile u_int icl_ncons;
#define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock)
#define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock)
#define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED)
#define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED)
STAILQ_HEAD(icl_pdu_stailq, icl_pdu);
static icl_conn_new_pdu_t icl_cxgbei_conn_new_pdu;
static icl_conn_pdu_free_t icl_cxgbei_conn_pdu_free;
static icl_conn_pdu_data_segment_length_t
icl_cxgbei_conn_pdu_data_segment_length;
static icl_conn_pdu_append_data_t icl_cxgbei_conn_pdu_append_data;
static icl_conn_pdu_get_data_t icl_cxgbei_conn_pdu_get_data;
static icl_conn_pdu_queue_t icl_cxgbei_conn_pdu_queue;
static icl_conn_handoff_t icl_cxgbei_conn_handoff;
static icl_conn_free_t icl_cxgbei_conn_free;
static icl_conn_close_t icl_cxgbei_conn_close;
static icl_conn_connected_t icl_cxgbei_conn_connected;
static icl_conn_task_setup_t icl_cxgbei_conn_task_setup;
static icl_conn_task_done_t icl_cxgbei_conn_task_done;
static icl_conn_transfer_setup_t icl_cxgbei_conn_transfer_setup;
static icl_conn_transfer_done_t icl_cxgbei_conn_transfer_done;
static kobj_method_t icl_cxgbei_methods[] = {
KOBJMETHOD(icl_conn_new_pdu, icl_cxgbei_conn_new_pdu),
KOBJMETHOD(icl_conn_pdu_free, icl_cxgbei_conn_pdu_free),
KOBJMETHOD(icl_conn_pdu_data_segment_length,
icl_cxgbei_conn_pdu_data_segment_length),
KOBJMETHOD(icl_conn_pdu_append_data, icl_cxgbei_conn_pdu_append_data),
KOBJMETHOD(icl_conn_pdu_get_data, icl_cxgbei_conn_pdu_get_data),
KOBJMETHOD(icl_conn_pdu_queue, icl_cxgbei_conn_pdu_queue),
KOBJMETHOD(icl_conn_handoff, icl_cxgbei_conn_handoff),
KOBJMETHOD(icl_conn_free, icl_cxgbei_conn_free),
KOBJMETHOD(icl_conn_close, icl_cxgbei_conn_close),
KOBJMETHOD(icl_conn_connected, icl_cxgbei_conn_connected),
KOBJMETHOD(icl_conn_task_setup, icl_cxgbei_conn_task_setup),
KOBJMETHOD(icl_conn_task_done, icl_cxgbei_conn_task_done),
KOBJMETHOD(icl_conn_transfer_setup, icl_cxgbei_conn_transfer_setup),
KOBJMETHOD(icl_conn_transfer_done, icl_cxgbei_conn_transfer_done),
{ 0, 0 }
};
DEFINE_CLASS(icl_cxgbei, icl_cxgbei_methods, sizeof(struct icl_conn));
struct icl_pdu * icl_pdu_new_empty(struct icl_conn *ic, int flags);
void icl_pdu_free(struct icl_pdu *ip);
struct icl_pdu *
icl_pdu_new_empty(struct icl_conn *ic, int flags)
{
struct icl_pdu *ip;
#ifdef DIAGNOSTIC
refcount_acquire(&ic->ic_outstanding_pdus);
#endif
ip = uma_zalloc(icl_pdu_zone, flags | M_ZERO);
if (ip == NULL) {
ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
#ifdef DIAGNOSTIC
refcount_release(&ic->ic_outstanding_pdus);
#endif
return (NULL);
}
ip->ip_conn = ic;
return (ip);
}
void
icl_pdu_free(struct icl_pdu *ip)
{
struct icl_conn *ic;
ic = ip->ip_conn;
m_freem(ip->ip_bhs_mbuf);
m_freem(ip->ip_ahs_mbuf);
m_freem(ip->ip_data_mbuf);
uma_zfree(icl_pdu_zone, ip);
#ifdef DIAGNOSTIC
refcount_release(&ic->ic_outstanding_pdus);
#endif
}
void
icl_cxgbei_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
{
icl_pdu_free(ip);
}
/*
* Allocate icl_pdu with empty BHS to fill up by the caller.
*/
struct icl_pdu *
icl_cxgbei_conn_new_pdu(struct icl_conn *ic, int flags)
{
struct icl_pdu *ip;
ip = icl_pdu_new_empty(ic, flags);
if (ip == NULL)
return (NULL);
ip->ip_bhs_mbuf = m_getm2(NULL, sizeof(struct iscsi_bhs),
flags, MT_DATA, M_PKTHDR);
if (ip->ip_bhs_mbuf == NULL) {
ICL_WARN("failed to allocate %zd bytes", sizeof(*ip));
icl_pdu_free(ip);
return (NULL);
}
ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *);
memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs));
ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs);
return (ip);
}
static size_t
icl_pdu_data_segment_length(const struct icl_pdu *request)
{
uint32_t len = 0;
len += request->ip_bhs->bhs_data_segment_len[0];
len <<= 8;
len += request->ip_bhs->bhs_data_segment_len[1];
len <<= 8;
len += request->ip_bhs->bhs_data_segment_len[2];
return (len);
}
size_t
icl_cxgbei_conn_pdu_data_segment_length(struct icl_conn *ic,
const struct icl_pdu *request)
{
return (icl_pdu_data_segment_length(request));
}
static void
icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len)
{
response->ip_bhs->bhs_data_segment_len[2] = len;
response->ip_bhs->bhs_data_segment_len[1] = len >> 8;
response->ip_bhs->bhs_data_segment_len[0] = len >> 16;
}
static size_t
icl_pdu_padding(const struct icl_pdu *ip)
{
if ((ip->ip_data_len % 4) != 0)
return (4 - (ip->ip_data_len % 4));
return (0);
}
static size_t
icl_pdu_size(const struct icl_pdu *response)
{
size_t len;
KASSERT(response->ip_ahs_len == 0, ("responding with AHS"));
len = sizeof(struct iscsi_bhs) + response->ip_data_len +
icl_pdu_padding(response);
return (len);
}
static uint32_t
icl_conn_build_tasktag(struct icl_conn *ic, uint32_t tag)
{
return tag;
}
static int
icl_soupcall_receive(struct socket *so, void *arg, int waitflag)
{
struct icl_conn *ic;
if (!soreadable(so))
return (SU_OK);
ic = arg;
cv_signal(&ic->ic_receive_cv);
return (SU_OK);
}
static int
icl_pdu_finalize(struct icl_pdu *request)
{
size_t padding, pdu_len;
uint32_t zero = 0;
int ok;
struct icl_conn *ic;
ic = request->ip_conn;
icl_pdu_set_data_segment_length(request, request->ip_data_len);
pdu_len = icl_pdu_size(request);
if (request->ip_data_len != 0) {
padding = icl_pdu_padding(request);
if (padding > 0) {
ok = m_append(request->ip_data_mbuf, padding,
(void *)&zero);
if (ok != 1) {
ICL_WARN("failed to append padding");
return (1);
}
}
m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf);
request->ip_data_mbuf = NULL;
}
request->ip_bhs_mbuf->m_pkthdr.len = pdu_len;
return (0);
}
static int
icl_soupcall_send(struct socket *so, void *arg, int waitflag)
{
struct icl_conn *ic;
if (!sowriteable(so))
return (SU_OK);
ic = arg;
ICL_CONN_LOCK(ic);
ic->ic_check_send_space = true;
ICL_CONN_UNLOCK(ic);
cv_signal(&ic->ic_send_cv);
return (SU_OK);
}
static int
icl_pdu_append_data(struct icl_pdu *request, const void *addr, size_t len,
int flags)
{
struct mbuf *mb, *newmb;
size_t copylen, off = 0;
KASSERT(len > 0, ("len == 0"));
newmb = m_getm2(NULL, len, flags, MT_DATA, M_PKTHDR);
if (newmb == NULL) {
ICL_WARN("failed to allocate mbuf for %zd bytes", len);
return (ENOMEM);
}
for (mb = newmb; mb != NULL; mb = mb->m_next) {
copylen = min(M_TRAILINGSPACE(mb), len - off);
memcpy(mtod(mb, char *), (const char *)addr + off, copylen);
mb->m_len = copylen;
off += copylen;
}
KASSERT(off == len, ("%s: off != len", __func__));
if (request->ip_data_mbuf == NULL) {
request->ip_data_mbuf = newmb;
request->ip_data_len = len;
} else {
m_cat(request->ip_data_mbuf, newmb);
request->ip_data_len += len;
}
return (0);
}
int
icl_cxgbei_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request,
const void *addr, size_t len, int flags)
{
return (icl_pdu_append_data(request, addr, len, flags));
}
static void
icl_pdu_get_data(struct icl_pdu *ip, size_t off, void *addr, size_t len)
{
/* data is DDP'ed, no need to copy */
if (ip->ip_ofld_prv0) return;
m_copydata(ip->ip_data_mbuf, off, len, addr);
}
void
icl_cxgbei_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip,
size_t off, void *addr, size_t len)
{
return (icl_pdu_get_data(ip, off, addr, len));
}
static void
icl_pdu_queue(struct icl_pdu *ip)
{
struct icl_conn *ic;
ic = ip->ip_conn;
ICL_CONN_LOCK_ASSERT(ic);
if (ic->ic_disconnecting || ic->ic_socket == NULL) {
ICL_DEBUG("icl_pdu_queue on closed connection");
icl_pdu_free(ip);
return;
}
icl_pdu_finalize(ip);
cxgbei_conn_xmit_pdu(ic, ip);
}
void
icl_cxgbei_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip)
{
icl_pdu_queue(ip);
}
static struct icl_conn *
icl_cxgbei_new_conn(const char *name, struct mtx *lock)
{
struct icl_conn *ic;
refcount_acquire(&icl_ncons);
ic = (struct icl_conn *)kobj_create(&icl_cxgbei_class, M_ICL_CXGBEI, M_WAITOK | M_ZERO);
STAILQ_INIT(&ic->ic_to_send);
ic->ic_lock = lock;
cv_init(&ic->ic_send_cv, "icl_tx");
cv_init(&ic->ic_receive_cv, "icl_rx");
#ifdef DIAGNOSTIC
refcount_init(&ic->ic_outstanding_pdus, 0);
#endif
ic->ic_max_data_segment_length = ICL_MAX_DATA_SEGMENT_LENGTH;
ic->ic_name = name;
ic->ic_offload = strdup("cxgbei", M_TEMP);;
return (ic);
}
void
icl_cxgbei_conn_free(struct icl_conn *ic)
{
cv_destroy(&ic->ic_send_cv);
cv_destroy(&ic->ic_receive_cv);
kobj_delete((struct kobj *)ic, M_ICL_CXGBEI);
refcount_release(&icl_ncons);
}
static int
icl_conn_start(struct icl_conn *ic)
{
size_t minspace;
struct sockopt opt;
int error, one = 1;
ICL_CONN_LOCK(ic);
/*
* XXX: Ugly hack.
*/
if (ic->ic_socket == NULL) {
ICL_CONN_UNLOCK(ic);
return (EINVAL);
}
ic->ic_receive_state = ICL_CONN_STATE_BHS;
ic->ic_receive_len = sizeof(struct iscsi_bhs);
ic->ic_disconnecting = false;
ICL_CONN_UNLOCK(ic);
/*
* For sendspace, this is required because the current code cannot
* send a PDU in pieces; thus, the minimum buffer size is equal
* to the maximum PDU size. "+4" is to account for possible padding.
*
* What we should actually do here is to use autoscaling, but set
* some minimal buffer size to "minspace". I don't know a way to do
* that, though.
*/
minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length +
ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4;
if (sendspace < minspace) {
ICL_WARN("kern.icl.sendspace too low; must be at least %zd",
minspace);
sendspace = minspace;
}
if (recvspace < minspace) {
ICL_WARN("kern.icl.recvspace too low; must be at least %zd",
minspace);
recvspace = minspace;
}
error = soreserve(ic->ic_socket, sendspace, recvspace);
if (error != 0) {
ICL_WARN("soreserve failed with error %d", error);
icl_cxgbei_conn_close(ic);
return (error);
}
ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE;
ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE;
/*
* Disable Nagle.
*/
bzero(&opt, sizeof(opt));
opt.sopt_dir = SOPT_SET;
opt.sopt_level = IPPROTO_TCP;
opt.sopt_name = TCP_NODELAY;
opt.sopt_val = &one;
opt.sopt_valsize = sizeof(one);
error = sosetopt(ic->ic_socket, &opt);
if (error != 0) {
ICL_WARN("disabling TCP_NODELAY failed with error %d", error);
icl_cxgbei_conn_close(ic);
return (error);
}
/*
* Register socket upcall, to get notified about incoming PDUs
* and free space to send outgoing ones.
*/
SOCKBUF_LOCK(&ic->ic_socket->so_snd);
soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, ic);
SOCKBUF_UNLOCK(&ic->ic_socket->so_snd);
SOCKBUF_LOCK(&ic->ic_socket->so_rcv);
soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, ic);
SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv);
return (0);
}
int
icl_cxgbei_conn_handoff(struct icl_conn *ic, int fd)
{
struct file *fp;
struct socket *so;
cap_rights_t rights;
int error;
ICL_CONN_LOCK_ASSERT_NOT(ic);
/*
* Steal the socket from userland.
*/
error = fget(curthread, fd,
cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp);
if (error != 0)
return (error);
if (fp->f_type != DTYPE_SOCKET) {
fdrop(fp, curthread);
return (EINVAL);
}
so = fp->f_data;
if (so->so_type != SOCK_STREAM) {
fdrop(fp, curthread);
return (EINVAL);
}
ICL_CONN_LOCK(ic);
if (ic->ic_socket != NULL) {
ICL_CONN_UNLOCK(ic);
fdrop(fp, curthread);
return (EBUSY);
}
ic->ic_socket = fp->f_data;
fp->f_ops = &badfileops;
fp->f_data = NULL;
fdrop(fp, curthread);
ICL_CONN_UNLOCK(ic);
error = icl_conn_start(ic);
if(!error) {
cxgbei_conn_set_ulp_mode(ic->ic_socket, ic);
}
return (error);
}
void
icl_cxgbei_conn_close(struct icl_conn *ic)
{
struct icl_pdu *pdu;
ICL_CONN_LOCK_ASSERT_NOT(ic);
ICL_CONN_LOCK(ic);
if (ic->ic_socket == NULL) {
ICL_CONN_UNLOCK(ic);
return;
}
/*
* Deregister socket upcalls.
*/
ICL_CONN_UNLOCK(ic);
SOCKBUF_LOCK(&ic->ic_socket->so_snd);
if (ic->ic_socket->so_snd.sb_upcall != NULL)
soupcall_clear(ic->ic_socket, SO_SND);
SOCKBUF_UNLOCK(&ic->ic_socket->so_snd);
SOCKBUF_LOCK(&ic->ic_socket->so_rcv);
if (ic->ic_socket->so_rcv.sb_upcall != NULL)
soupcall_clear(ic->ic_socket, SO_RCV);
SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv);
ICL_CONN_LOCK(ic);
ic->ic_disconnecting = true;
/*
* Wake up the threads, so they can properly terminate.
*/
while (ic->ic_receive_running || ic->ic_send_running) {
//ICL_DEBUG("waiting for send/receive threads to terminate");
cv_signal(&ic->ic_receive_cv);
cv_signal(&ic->ic_send_cv);
cv_wait(&ic->ic_send_cv, ic->ic_lock);
}
//ICL_DEBUG("send/receive threads terminated");
ICL_CONN_UNLOCK(ic);
cxgbei_conn_close(ic->ic_socket);
soclose(ic->ic_socket);
ICL_CONN_LOCK(ic);
ic->ic_socket = NULL;
if (ic->ic_receive_pdu != NULL) {
//ICL_DEBUG("freeing partially received PDU");
icl_pdu_free(ic->ic_receive_pdu);
ic->ic_receive_pdu = NULL;
}
/*
* Remove any outstanding PDUs from the send queue.
*/
while (!STAILQ_EMPTY(&ic->ic_to_send)) {
pdu = STAILQ_FIRST(&ic->ic_to_send);
STAILQ_REMOVE_HEAD(&ic->ic_to_send, ip_next);
icl_pdu_free(pdu);
}
KASSERT(STAILQ_EMPTY(&ic->ic_to_send),
("destroying session with non-empty send queue"));
#ifdef DIAGNOSTIC
KASSERT(ic->ic_outstanding_pdus == 0,
("destroying session with %d outstanding PDUs",
ic->ic_outstanding_pdus));
#endif
ICL_CONN_UNLOCK(ic);
}
bool
icl_cxgbei_conn_connected(struct icl_conn *ic)
{
ICL_CONN_LOCK_ASSERT_NOT(ic);
ICL_CONN_LOCK(ic);
if (ic->ic_socket == NULL) {
ICL_CONN_UNLOCK(ic);
return (false);
}
if (ic->ic_socket->so_error != 0) {
ICL_CONN_UNLOCK(ic);
return (false);
}
ICL_CONN_UNLOCK(ic);
return (true);
}
int
icl_cxgbei_conn_task_setup(struct icl_conn *ic, struct ccb_scsiio *csio,
uint32_t *task_tagp, void **prvp)
{
void *prv;
*task_tagp = icl_conn_build_tasktag(ic, *task_tagp);
prv = uma_zalloc(icl_transfer_zone, M_NOWAIT | M_ZERO);
if (prv == NULL)
return (ENOMEM);
*prvp = prv;
cxgbei_conn_task_reserve_itt(ic, prvp, csio, task_tagp);
return (0);
}
void
icl_cxgbei_conn_task_done(struct icl_conn *ic, void *prv)
{
cxgbei_cleanup_task(ic, prv);
uma_zfree(icl_transfer_zone, prv);
}
int
icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io,
uint32_t *transfer_tag, void **prvp)
{
void *prv;
*transfer_tag = icl_conn_build_tasktag(ic, *transfer_tag);
prv = uma_zalloc(icl_transfer_zone, M_NOWAIT | M_ZERO);
if (prv == NULL)
return (ENOMEM);
*prvp = prv;
cxgbei_conn_transfer_reserve_ttt(ic, prvp, io, transfer_tag);
return (0);
}
void
icl_cxgbei_conn_transfer_done(struct icl_conn *ic, void *prv)
{
cxgbei_cleanup_task(ic, prv);
uma_zfree(icl_transfer_zone, prv);
}
static int
icl_cxgbei_limits(size_t *limitp)
{
*limitp = 8 * 1024;
return (0);
}
#ifdef ICL_KERNEL_PROXY
int
icl_conn_handoff_sock(struct icl_conn *ic, struct socket *so)
{
int error;
ICL_CONN_LOCK_ASSERT_NOT(ic);
if (so->so_type != SOCK_STREAM)
return (EINVAL);
ICL_CONN_LOCK(ic);
if (ic->ic_socket != NULL) {
ICL_CONN_UNLOCK(ic);
return (EBUSY);
}
ic->ic_socket = so;
ICL_CONN_UNLOCK(ic);
error = icl_conn_start(ic);
return (error);
}
#endif /* ICL_KERNEL_PROXY */
static int
icl_cxgbei_load(void)
{
int error;
icl_pdu_zone = uma_zcreate("icl_pdu",
sizeof(struct icl_pdu), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
icl_transfer_zone = uma_zcreate("icl_transfer",
16 * 1024, NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, 0);
refcount_init(&icl_ncons, 0);
/*
* The reason we call this "none" is that to the user,
* it's known as "offload driver"; "offload driver: soft"
* doesn't make much sense.
*/
error = icl_register("cxgbei", 100, icl_cxgbei_limits, icl_cxgbei_new_conn);
KASSERT(error == 0, ("failed to register"));
return (error);
}
static int
icl_cxgbei_unload(void)
{
if (icl_ncons != 0)
return (EBUSY);
icl_unregister("cxgbei");
uma_zdestroy(icl_pdu_zone);
uma_zdestroy(icl_transfer_zone);
return (0);
}
static int
icl_cxgbei_modevent(module_t mod, int what, void *arg)
{
switch (what) {
case MOD_LOAD:
return (icl_cxgbei_load());
case MOD_UNLOAD:
return (icl_cxgbei_unload());
default:
return (EINVAL);
}
}
moduledata_t icl_cxgbei_data = {
"icl_cxgbei",
icl_cxgbei_modevent,
0
};
DECLARE_MODULE(icl_cxgbei, icl_cxgbei_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
MODULE_DEPEND(icl_cxgbei, icl, 1, 1, 1);
MODULE_VERSION(icl_cxgbei, 1);

View File

@ -0,0 +1,7 @@
t0 {
TargetAddress = 15.1.1.33
TargetName = iqn.2008-09.com.example:server.target1
HeaderDigest = CRC32C
DataDigest = CRC32C
FirstBurstLength = 8192
}

View File

@ -0,0 +1,121 @@
/**************************************************************************
Copyright (c) 2007-2008, Chelsio Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Neither the name of the Chelsio Corporation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
$FreeBSD$
***************************************************************************/
#ifndef CXGB_MBUFQ_H_
#define CXGB_MBUFQ_H_
struct mbuf_head {
struct mbuf *head;
struct mbuf *tail;
uint32_t qlen;
uint32_t qsize;
struct mtx lock;
};
static __inline void
mbufq_init(struct mbuf_head *l)
{
l->head = l->tail = NULL;
l->qlen = l->qsize = 0;
}
static __inline int
mbufq_empty(struct mbuf_head *l)
{
return (l->head == NULL);
}
static __inline int
mbufq_len(struct mbuf_head *l)
{
return (l->qlen);
}
static __inline int
mbufq_size(struct mbuf_head *l)
{
return (l->qsize);
}
static __inline int
mbufq_head_size(struct mbuf_head *l)
{
return (l->head ? l->head->m_pkthdr.len : 0);
}
static __inline void
mbufq_tail(struct mbuf_head *l, struct mbuf *m)
{
l->qlen++;
if (l->head == NULL)
l->head = m;
else
l->tail->m_nextpkt = m;
l->tail = m;
l->qsize += m->m_pkthdr.len;
}
static __inline struct mbuf *
mbufq_dequeue(struct mbuf_head *l)
{
struct mbuf *m;
m = l->head;
if (m) {
if (m == l->tail)
l->head = l->tail = NULL;
else
l->head = m->m_nextpkt;
m->m_nextpkt = NULL;
l->qlen--;
l->qsize -= m->m_pkthdr.len;
}
return (m);
}
static __inline struct mbuf *
mbufq_peek(const struct mbuf_head *l)
{
return (l->head);
}
static __inline void
mbufq_append(struct mbuf_head *a, struct mbuf_head *b)
{
if (a->tail)
a->tail->m_nextpkt = b->head;
if (b->tail)
a->tail = b->tail;
a->qlen += b->qlen;
a->qsize += b->qsize;
}
#endif /* CXGB_MBUFQ_H_ */

View File

@ -990,7 +990,10 @@ t4_tod_output(struct toedev *tod, struct tcpcb *tp)
("%s: inp %p dropped.", __func__, inp));
KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
t4_push_frames(sc, toep, 0);
if (toep->ulp_mode == ULP_MODE_ISCSI)
t4_ulp_push_frames(sc, toep, 0);
else
t4_push_frames(sc, toep, 0);
return (0);
}

View File

@ -458,14 +458,9 @@ do_rx_data_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
__func__, vld, tid, toep);
}
if (toep->ulp_mode == ULP_MODE_ISCSI) {
m = m_get(M_NOWAIT, MT_DATA);
if (m == NULL)
CXGBE_UNIMPLEMENTED("mbuf alloc failure");
memcpy(mtod(m, unsigned char *), cpl,
sizeof(struct cpl_rx_data_ddp));
if (!t4_cpl_iscsi_callback(td, toep, m, CPL_RX_DATA_DDP))
if (!t4_cpl_iscsi_callback(td, toep, (void *)cpl,
CPL_RX_DATA_DDP))
return (0);
m_freem(m);
}
handle_ddp_data(toep, cpl->u.ddp_report, cpl->seq, be16toh(cpl->len));

View File

@ -80,6 +80,7 @@ struct icl_pdu {
uint32_t ip_prv0;
uint32_t ip_prv1;
uint32_t ip_prv2;
uint32_t ip_ofld_prv0;/* indicate iscsi-inititor that data is DDP'ed */
};
#define ICL_CONN_STATE_INVALID 0

View File

@ -11,9 +11,11 @@ SUBDIR+= t4_firmware
SUBDIR+= t5_firmware
SUBDIR+= ${_tom}
SUBDIR+= ${_iw_cxgbe}
SUBDIR+= ${_cxgbei}
.if ${MACHINE_CPUARCH} == "amd64"
_tom= tom
_cxgbei= cxgbei
.if ${MK_OFED} != "no" || defined(ALL_MODULES)
_iw_cxgbe= iw_cxgbe
.endif

View File

@ -0,0 +1,25 @@
# $FreeBSD$
CXGBEI = ${.CURDIR}/../../../dev/cxgbe/cxgbei
CXGBE = ${.CURDIR}/../../../dev/cxgbe
.PATH: ${CXGBEI} ${CXGBE}/common
KMOD= cxgbei
SRCS= cxgbei.c
SRCS+= cxgbei_ulp2_ddp.c
SRCS+= icl_cxgbei.c
SRCS+= bus_if.h
SRCS+= device_if.h
SRCS+= opt_inet.h
SRCS+= opt_inet6.h
SRCS+= opt_ofed.h
SRCS+= pci_if.h
SRCS+= opt_cam.h
SRCS+= icl_conn_if.h
CFLAGS+= -I${CXGBE}
MFILES= kern/bus_if.m kern/device_if.m dev/iscsi/icl_conn_if.m dev/pci/pci_if.m
.include <bsd.kmod.mk>