Poul-Henning Kamp cf827063a9 Give MEXTADD() another argument to make both void pointers to the
free function controlable, instead of passing the KVA of the buffer
storage as the first argument.

Fix all conventional users of the API to pass the KVA of the buffer
as the first argument, to make this a no-op commit.

Likely break the only non-convetional user of the API, after informing
the relevant committer.

Update the mbuf(9) manual page, which was already out of sync on
this point.

Bump __FreeBSD_version to 800016 as there is no way to tell how
many arguments a CPP macro needs any other way.

This paves the way for giving sendfile(9) a way to wait for the
passed storage to have been accessed before returning.

This does not affect the memory layout or size of mbufs.

Parental oversight by:	sam and rwatson.

No MFC is anticipated.
2008-02-01 19:36:27 +00:00

577 lines
15 KiB
C

/*-
* Copyright (c) 2005-2007 Daniel Braniss <danny@cs.huji.ac.il>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
/*
| iSCSI
| $Id: isc_soc.c,v 1.26 2007/05/19 06:09:01 danny Exp danny $
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_iscsi_initiator.h"
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/conf.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/ctype.h>
#include <sys/errno.h>
#include <sys/sysctl.h>
#include <sys/file.h>
#include <sys/uio.h>
#include <sys/socketvar.h>
#include <sys/socket.h>
#include <sys/protosw.h>
#include <sys/proc.h>
#include <sys/ioccom.h>
#include <sys/queue.h>
#include <sys/kthread.h>
#include <sys/syslog.h>
#include <sys/mbuf.h>
#include <sys/user.h>
#include <dev/iscsi/initiator/iscsi.h>
#include <dev/iscsi/initiator/iscsivar.h>
#ifndef USE_MBUF
#define USE_MBUF
#endif
#ifdef USE_MBUF
/*
| a dummy function for freeing external storage for mbuf
*/
static void
nil_fn(void *a, void *b)
{
}
static int nil_refcnt = 0;
#endif /* USE_MBUF */
int
isc_sendPDU(isc_session_t *sp, pduq_t *pq)
{
pdu_t *pp = &pq->pdu;
int len, error;
#ifdef USE_MBUF
struct mbuf *mh, **mp;
#else
struct uio *uio = &pq->uio;
struct iovec *iv;
#endif /* USE_MBUF */
debug_called(8);
#ifndef USE_MBUF
bzero(uio, sizeof(struct uio));
uio->uio_rw = UIO_WRITE;
uio->uio_segflg = UIO_SYSSPACE;
uio->uio_td = sp->td;
uio->uio_iov = iv = pq->iov;
iv->iov_base = &pp->ipdu;
iv->iov_len = sizeof(union ipdu_u);
uio->uio_resid = pq->len;
iv++;
#else /* USE_MBUF */
/* mbuf for the iSCSI header */
MGETHDR(mh, M_TRYWAIT, MT_DATA);
mh->m_len = mh->m_pkthdr.len = sizeof(union ipdu_u);
mh->m_pkthdr.rcvif = NULL;
MH_ALIGN(mh, sizeof(union ipdu_u));
bcopy(&pp->ipdu, mh->m_data, sizeof(union ipdu_u));
mh->m_next = NULL;
#endif /* USE_MBUF */
if(sp->hdrDigest)
pq->pdu.hdr_dig = sp->hdrDigest(&pp->ipdu, sizeof(union ipdu_u), 0);
if(pp->ahs_len) {
#ifndef USE_MBUF
iv->iov_base = pp->ahs;
iv->iov_len = pp->ahs_len;
iv++;
#else /* USE_MBUF */
/* Add any AHS to the iSCSI hdr mbuf */
/* XXX Assert: (mh->m_pkthdr.len + pp->ahs_len) < MHLEN */
bcopy(pp->ahs, (mh->m_data + mh->m_len), pp->ahs_len);
mh->m_len += pp->ahs_len;
mh->m_pkthdr.len += pp->ahs_len;
#endif /* USE_MBUF */
if(sp->hdrDigest)
pq->pdu.hdr_dig = sp->hdrDigest(&pp->ahs, pp->ahs_len, pq->pdu.hdr_dig);
}
if(sp->hdrDigest) {
debug(2, "hdr_dig=%x", pq->pdu.hdr_dig);
#ifndef USE_MBUF
iv->iov_base = &pp->hdr_dig;
iv->iov_len = sizeof(int);
iv++;
#else /* USE_MBUF */
/* Add header digest to the iSCSI hdr mbuf */
/* XXX Assert: (mh->m_pkthdr.len + 4) < MHLEN */
bcopy(&pp->hdr_dig, (mh->m_data + mh->m_len), sizeof(int));
mh->m_len += sizeof(int);
mh->m_pkthdr.len += sizeof(int);
#endif /* USE_MBUF */
}
#ifdef USE_MBUF
mp = &mh->m_next;
#endif /* USE_MBUF */
if(pq->pdu.ds) {
#ifndef USE_MBUF
iv->iov_base = pp->ds;
iv->iov_len = pp->ds_len;
while(iv->iov_len & 03) // the specs say it must be int alligned
iv->iov_len++;
iv++;
#else /* USE_MBUF */
struct mbuf *md;
int off = 0;
len = pp->ds_len;
while(len & 03) // the specs say it must be int alligned
len++;
while (len > 0) {
int l;
MGET(md, M_TRYWAIT, MT_DATA);
md->m_ext.ref_cnt = &nil_refcnt;
l = min(MCLBYTES, len);
MEXTADD(md, pp->ds + off, l, nil_fn,
pp->ds + off, NULL, 0, EXT_EXTREF);
md->m_len = l;
md->m_next = NULL;
mh->m_pkthdr.len += l;
*mp = md;
mp = &md->m_next;
len -= l;
off += l;
}
#endif /* USE_MBUF */
}
if(sp->dataDigest) {
#ifdef USE_MBUF
struct mbuf *me;
#endif /* USE_MBUF */
pp->ds_dig = sp->dataDigest(pp->ds, pp->ds_len, 0);
#ifndef USE_MBUF
iv->iov_base = &pp->ds_dig;
iv->iov_len = sizeof(int);
iv++;
#else /* USE_MBUF */
MGET(me, M_TRYWAIT, MT_DATA);
me->m_len = sizeof(int);
MH_ALIGN(mh, sizeof(int));
bcopy(&pp->ds_dig, me->m_data, sizeof(int));
me->m_next = NULL;
mh->m_pkthdr.len += sizeof(int);
*mp = me;
#endif /* USE_MBUF */
}
#ifndef USE_MBUF
uio->uio_iovcnt = iv - pq->iov;
sdebug(5, "opcode=%x iovcnt=%d uio_resid=%d itt=%x",
pp->ipdu.bhs.opcode, uio->uio_iovcnt, uio->uio_resid,
ntohl(pp->ipdu.bhs.itt));
sdebug(5, "sp=%p sp->soc=%p uio=%p sp->td=%p",
sp, sp->soc, uio, sp->td);
do {
len = uio->uio_resid;
error = sosend(sp->soc, NULL, uio, 0, 0, 0, sp->td);
if(uio->uio_resid == 0 || error || len == uio->uio_resid) {
if(uio->uio_resid) {
sdebug(2, "uio->uio_resid=%d uio->uio_iovcnt=%d error=%d len=%d",
uio->uio_resid, uio->uio_iovcnt, error, len);
if(error == 0)
error = EAGAIN; // 35
}
break;
}
/*
| XXX: untested code
*/
sdebug(1, "uio->uio_resid=%d uio->uio_iovcnt=%d",
uio->uio_resid, uio->uio_iovcnt);
iv = uio->uio_iov;
len -= uio->uio_resid;
while(uio->uio_iovcnt > 0) {
if(iv->iov_len > len) {
caddr_t bp = (caddr_t)iv->iov_base;
iv->iov_len -= len;
iv->iov_base = (void *)&bp[len];
break;
}
len -= iv->iov_len;
uio->uio_iovcnt--;
uio->uio_iov++;
iv++;
}
} while(uio->uio_resid);
if(error == 0) {
sp->stats.nsent++;
getbintime(&sp->stats.t_sent);
#else /* USE_MBUF */
if ((error = sosend(sp->soc, NULL, NULL, mh, 0, 0, sp->td)) != 0) {
m_freem(mh);
return (error);
#endif /* USE_MBUF */
}
#ifndef USE_MBUF
return error;
#else /* USE_MBUF */
sp->stats.nsent++;
getbintime(&sp->stats.t_sent);
return 0;
#endif /* USE_MBUF */
}
/*
| wait till a PDU header is received
| from the socket.
*/
/*
The format of the BHS is:
Byte/ 0 | 1 | 2 | 3 |
/ | | | |
|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|
+---------------+---------------+---------------+---------------+
0|.|I| Opcode |F| Opcode-specific fields |
+---------------+---------------+---------------+---------------+
4|TotalAHSLength | DataSegmentLength |
+---------------+---------------+---------------+---------------+
8| LUN or Opcode-specific fields |
+ +
12| |
+---------------+---------------+---------------+---------------+
16| Initiator Task Tag |
+---------------+---------------+---------------+---------------+
20/ Opcode-specific fields /
+/ /
+---------------+---------------+---------------+---------------+
48
*/
static __inline int
so_getbhs(isc_session_t *sp)
{
bhs_t *bhs = &sp->bhs;
struct uio *uio = &sp->uio;
struct iovec *iov = &sp->iov;
int error, flags;
debug_called(8);
iov->iov_base = bhs;
iov->iov_len = sizeof(bhs_t);
uio->uio_iov = iov;
uio->uio_iovcnt = 1;
uio->uio_rw = UIO_READ;
uio->uio_segflg = UIO_SYSSPACE;
uio->uio_td = curthread; // why ...
uio->uio_resid = sizeof(bhs_t);
flags = MSG_WAITALL;
error = soreceive(sp->soc, NULL, uio, 0, 0, &flags);
if(error)
debug(2, "error=%d so_error=%d uio->uio_resid=%d iov.iov_len=%zd",
error,
sp->soc->so_error, uio->uio_resid, iov->iov_len);
if(!error && (uio->uio_resid > 0)) {
debug(2, "error=%d so_error=%d uio->uio_resid=%d iov.iov_len=%zd so_state=%x",
error,
sp->soc->so_error, uio->uio_resid, iov->iov_len, sp->soc->so_state);
error = EAGAIN; // EPIPE;
}
return error;
}
/*
| so_recv gets called when there is at least
| an iSCSI header in the queue
*/
static int
so_recv(isc_session_t *sp, pduq_t *pq)
{
struct socket *so = sp->soc;
sn_t *sn = &sp->sn;
struct uio *uio = &pq->uio;
pdu_t *pp;
int error;
size_t n, len;
bhs_t *bhs;
u_int max, exp;
debug_called(8);
/*
| now calculate how much data should be in the buffer
| NOTE: digest is not verified/calculated - yet
*/
pp = &pq->pdu;
bhs = &pp->ipdu.bhs;
len = 0;
if(bhs->AHSLength) {
pp->ahs_len = bhs->AHSLength * 4;
len += pp->ahs_len;
}
if(sp->hdrDigest)
len += 4;
if(bhs->DSLength) {
n = bhs->DSLength;
#if BYTE_ORDER == LITTLE_ENDIAN
pp->ds_len = ((n & 0x00ff0000) >> 16)
| (n & 0x0000ff00)
| ((n & 0x000000ff) << 16);
#else
pp->ds_len = n;
#endif
len += pp->ds_len;
while(len & 03)
len++;
if(sp->dataDigest)
len += 4;
}
if((sp->opt.maxRecvDataSegmentLength > 0) && (len > sp->opt.maxRecvDataSegmentLength)) {
#if 0
xdebug("impossible PDU length(%d) opt.maxRecvDataSegmentLength=%d",
len, sp->opt.maxRecvDataSegmentLength);
// deep trouble here, probably all we can do is
// force a disconnect, XXX: check RFC ...
log(LOG_ERR,
"so_recv: impossible PDU length(%ld) from iSCSI %s/%s\n",
len, sp->opt.targetAddress, sp->opt.targetName);
#endif
/*
| XXX: this will realy screwup the stream.
| should clear up the buffer till a valid header
| is found, or just close connection ...
| should read the RFC.
*/
error = E2BIG;
goto out;
}
if(len) {
int flags;
uio->uio_resid = len;
uio->uio_td = curthread; // why ...
flags = MSG_WAITALL;
error = soreceive(so, NULL, uio, &pq->mp, NULL, &flags);
//if(error == EAGAIN)
// XXX: this needs work! it hangs iscontrol
if(error || uio->uio_resid)
goto out;
}
pq->len += len;
sdebug(6, "len=%d] opcode=0x%x ahs_len=0x%x ds_len=0x%x",
pq->len, bhs->opcode, pp->ahs_len, pp->ds_len);
max = ntohl(bhs->MaxCmdSN);
exp = ntohl(bhs->ExpStSN);
if(max < exp - 1 &&
max > exp - _MAXINCR) {
sdebug(2, "bad cmd window size");
error = EIO; // XXX: for now;
goto out; // error
}
if(SNA_GT(max, sn->maxCmd))
sn->maxCmd = max;
if(SNA_GT(exp, sn->expCmd))
sn->expCmd = exp;
sp->cws = sn->maxCmd - sn->expCmd + 1;
return 0;
out:
// XXX: need some work here
xdebug("have a problem, error=%d", error);
pdu_free(sp->isc, pq);
if(!error && uio->uio_resid > 0)
error = EPIPE;
return error;
}
/*
| wait for something to arrive.
| and if the pdu is without errors, process it.
*/
static int
so_input(isc_session_t *sp)
{
pduq_t *pq;
int error;
debug_called(8);
/*
| first read in the iSCSI header
*/
error = so_getbhs(sp);
if(error == 0) {
/*
| now read the rest.
*/
pq = pdu_alloc(sp->isc, 1); // OK to WAIT
pq->pdu.ipdu.bhs = sp->bhs;
pq->len = sizeof(bhs_t); // so far only the header was read
error = so_recv(sp, pq);
if(error != 0) {
error += 0x800; // XXX: just to see the error.
// terminal error
// XXX: close connection and exit
}
else {
sp->stats.nrecv++;
getbintime(&sp->stats.t_recv);
ism_recv(sp, pq);
}
}
return error;
}
/*
| one per active (connected) session.
| this thread is responsible for reading
| in packets from the target.
*/
static void
isc_soc(void *vp)
{
isc_session_t *sp = (isc_session_t *)vp;
struct socket *so = sp->soc;
int error;
debug_called(8);
sp->flags |= ISC_CON_RUNNING;
if(sp->cam_path)
ic_release(sp);
error = 0;
while(sp->flags & ISC_CON_RUN) {
// XXX: hunting ...
if(sp->soc == NULL || !(so->so_state & SS_ISCONNECTED)) {
debug(2, "sp->soc=%p", sp->soc);
break;
}
error = so_input(sp);
if(error == 0) {
#ifdef ISC_OWAITING
mtx_lock(&sp->io_mtx);
if(sp->flags & ISC_OWAITING) {
sp->flags &= ~ISC_OWAITING;
}
wakeup(&sp->flags);
mtx_unlock(&sp->io_mtx);
#else
wakeup(&sp->flags);
#endif
} else if(error == EPIPE)
break;
else if(error == EAGAIN) {
if(so->so_state & SS_ISCONNECTED)
// there seems to be a problem in 6.0 ...
tsleep(sp, PRIBIO, "isc_soc", 2*hz);
}
}
sdebug(2, "terminated, flags=%x so_count=%d so_state=%x error=%d",
sp->flags, so->so_count, so->so_state, error);
if((sp->proc != NULL) && sp->signal) {
PROC_LOCK(sp->proc);
psignal(sp->proc, sp->signal);
PROC_UNLOCK(sp->proc);
sp->flags |= ISC_SIGNALED;
sdebug(2, "pid=%d signaled(%d)", sp->proc->p_pid, sp->signal);
}
else {
// we have to do something ourselves
// like closing this session ...
}
/*
| we've been terminated
*/
// do we need this mutex ...?
mtx_lock(&sp->io_mtx);
sp->flags &= ~ISC_CON_RUNNING;
wakeup(&sp->soc);
mtx_unlock(&sp->io_mtx);
kproc_exit(0);
}
void
isc_stop_receiver(isc_session_t *sp)
{
int n = 5;
debug_called(8);
sdebug(4, "sp=%p sp->soc=%p", sp, sp? sp->soc: 0);
soshutdown(sp->soc, SHUT_RD);
mtx_lock(&sp->io_mtx);
sp->flags &= ~ISC_CON_RUN;
while(n-- && (sp->flags & ISC_CON_RUNNING)) {
sdebug(3, "waiting n=%d... flags=%x", n, sp->flags);
msleep(&sp->soc, &sp->io_mtx, PRIBIO, "isc_stpc", 5*hz);
}
mtx_unlock(&sp->io_mtx);
if(sp->fp != NULL)
fdrop(sp->fp, sp->td);
fputsock(sp->soc);
sp->soc = NULL;
sp->fp = NULL;
}
void
isc_start_receiver(isc_session_t *sp)
{
debug_called(8);
sp->flags |= ISC_CON_RUN;
kproc_create(isc_soc, sp, &sp->soc_proc, 0, 0, "iscsi%d", sp->sid);
}