Add a simple API to manage scatter/gather lists of phyiscal addresses.

Each list describes a logical memory object that is backed by one or more
physical address ranges.  To minimize locking, the sglist objects
themselves are immutable once they are shared.

These objects may be used in the future to facilitate I/O requests using
physically-addressed buffers.  For the immediate future I plan to use them
to implement a new type of VM object and pager.

Reviewed by:	jeff, scottl
MFC after:	1 month
This commit is contained in:
jhb 2009-06-01 20:35:39 +00:00
parent 9e9bf16671
commit e45af7ed87
5 changed files with 1283 additions and 0 deletions

View File

@ -218,6 +218,7 @@ MAN= accept_filter.9 \
selrecord.9 \
sema.9 \
sf_buf.9 \
sglist.9 \
signal.9 \
sleep.9 \
sleepqueue.9 \
@ -1022,6 +1023,24 @@ MLINKS+=sf_buf.9 sf_buf_alloc.9 \
sf_buf.9 sf_buf_free.9 \
sf_buf.9 sf_buf_kva.9 \
sf_buf.9 sf_buf_page.9
MLINKS+=sglist.9 sglist_alloc.9 \
sglist.9 sglist_append.9 \
sglist.9 sglist_append_mbuf.9 \
sglist.9 sglist_append_phys.9 \
sglist.9 sglist_append_uio.9 \
sglist.9 sglist_append_user.9 \
sglist.9 sglist_build.9 \
sglist.9 sglist_clone.9 \
sglist.9 sglist_consume_uio.9 \
sglist.9 sglist_count.9 \
sglist.9 sglist_free.9 \
sglist.9 sglist_hold.9 \
sglist.9 sglist_init.9 \
sglist.9 sglist_join.9 \
sglist.9 sglist_length.9 \
sglist.9 sglist_reset.9 \
sglist.9 sglist_slice.9 \
sglist.9 sglist_split.9
MLINKS+=signal.9 cursig.9 \
signal.9 execsigs.9 \
signal.9 issignal.9 \

503
share/man/man9/sglist.9 Normal file
View File

@ -0,0 +1,503 @@
.\"
.\" Copyright (c) 2009 Advanced Computing Technologies LLC
.\" Written by: John H. Baldwin <jhb@FreeBSD.org>
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" $FreeBSD$
.\"
.Dd May 15, 2009
.Dt SGLIST 9
.Os
.Sh NAME
.Nm sglist ,
.Nm sglist_alloc ,
.Nm sglist_append ,
.Nm sglist_append_mbuf ,
.Nm sglist_append_phys ,
.Nm sglist_append_uio ,
.Nm sglist_append_user ,
.Nm sglist_build ,
.Nm sglist_clone ,
.Nm sglist_consume_uio ,
.Nm sglist_count ,
.Nm sglist_free ,
.Nm sglist_hold ,
.Nm sglist_init ,
.Nm sglist_join ,
.Nm sglist_length ,
.Nm sglist_reset ,
.Nm sglist_slice ,
.Nm sglist_split
.Nd manage a scatter/gather list of physical memory addresses
.Sh SYNOPSIS
.In sys/types.h
.In sys/sglist.h
.Ft struct sglist *
.Fn sglist_alloc "int nsegs" "int mflags"
.Ft int
.Fn sglist_append "struct sglist *sg" "void *buf" "size_t len"
.Ft int
.Fn sglist_append_mbuf "struct sglist *sg" "struct mbuf *m"
.Ft int
.Fn sglist_append_phys "struct sglist *sg" "vm_paddr_t paddr" "size_t len"
.Ft int
.Fn sglist_append_uio "struct sglist *sg" "struct uio *uio"
.Ft int
.Fn sglist_append_user "struct sglist *sg" "void *buf" "size_t len" "struct thread *td"
.Ft struct sglist *
.Fn sglist_build "void *buf" "size_t len" "int mflags"
.Ft struct sglist *
.Fn sglist_clone "struct sglist *sg" "int mflags"
.Ft int
.Fn sglist_consume_uio "struct sglist *sg" "struct uio *uio" "int resid"
.Ft int
.Fn sglist_count "void *buf" "size_t len"
.Ft void
.Fn sglist_free "struct sglist *sg"
.Ft struct sglist *
.Fn sglist_hold "struct sglist *sg"
.Ft void
.Fn sglist_init "struct sglist *sg" "int maxsegs" "struct sglist_seg *segs"
.Ft int
.Fn sglist_join "struct sglist *first" "struct sglist *second"
.Ft size_t
.Fn sglist_length "struct sglist *sg"
.Ft void
.Fn sglist_reset "struct sglist *sg"
.Ft int
.Fn sglist_slice "struct sglist *original" "struct sglist **slice" "size_t offset" "size_t length" "int mflags"
.Ft int
.Fn sglist_split "struct sglist *original" "struct sglist **head" "size_t length" "int mflags"
.Sh DESCRIPTION
The
.Nm
API manages physical address ranges.
Each list contains one or more elements.
Each element contains a starting physical address and a length.
Scatter/gather lists are read-only while they are shared.
If one wishes to alter an existing scatter/gather list and does not hold the
sole reference to the list,
then one should create a new list instead of modifying the existing list.
.Pp
Each scatter/gather list object contains a reference count.
New lists are created with a single reference.
New references are obtained by calling
.Nm sglist_hold
and are released by calling
.Nm sglist_free .
.Ss Allocating and Initializing Lists
Each
.Nm
object consists of a header structure and a variable-length array of
scatter/gather list elements.
The
.Nm sglist_alloc
function allocates a new list that contains a header and
.Fa nsegs
scatter/gather list elements.
The
.Fa mflags
argument can be set to either
.Dv M_NOWAIT
or
.Dv M_WAITOK .
.Pp
The
.Nm sglist_count
function returns the number of scatter/gather list elements needed to describe
the physical address ranges mapped by a single kernel virtual address range.
The kernel virtual address range starts at
.Fa buf
and is
.Fa len
bytes long.
.Pp
The
.Nm sglist_build
function allocates a new scatter/gather list object that describes the physical
address ranges mapped by a single kernel virtual address range.
The kernel virtual address range starts at
.Fa buf
and is
.Fa len
bytes long.
The
.Fa mflags
argument can be set to either
.Dv M_NOWAIT
or
.Dv M_WAITOK .
.Pp
The
.Nm sglist_clone
function returns a copy of an exising scatter/gather list object
.Fa sg .
The
.Fa mflags
argument can be set to either
.Dv M_NOWAIT
or
.Dv M_WAITOK .
This can be used to obtain a private copy of a scatter/gather list before
modifying it.
.Pp
The
.Nm sglist_init
function initializes a scatter/gather list header.
The header is pointed to by
.Fa sg
and is initialized to manage an array of
.Fa maxsegs
scatter/gather list elements pointed to by
.Fa segs .
This can be used to initialize a scatter/gather list header whose storage
is not provided by
.Nm sglist_alloc .
In that case, the caller should not call
.Nm sglist_free
to release its own reference and is responsible for ensuring all other
references to the list are dropped before it releases the storage for
.Fa sg
and
.Fa segs .
.Ss Constructing Scatter/Gather Lists
The
.Nm
API provides several routines for building a scatter/gather list to describe
one or more objects.
Specifically, the
.Nm sglist_append
family of routines can be used to append the physical address ranges described
by an object to the end of a scatter/gather list.
All of these routines return 0 on success or an error on failure.
.Pp
The
.Nm sglist_append
function appends the physical address ranges described by a single kernel
virtual address range to the scatter/gather list
.Fa sg .
The kernel virtual address range starts at
.Fa buf
and is
.Fa len
bytes long.
.Pp
The
.Nm sglist_append_mbuf
function appends the physical address ranges described by an entire mbuf
chain
.Fa m
to the scatter/gather list
.Fa sg .
.Pp
The
.Nm sglist_append_phys
function appends a single physical address range to the scatter/gather list
.Fa sg .
The physical address range starts at
.Fa paddr
and is
.Fa len
bytes long.
.Pp
The
.Nm sglist_append_uio
function appends the physical address ranges described by a
.Xr uio 9
object to the scatter/gather list
.Fa sg .
Note that it is the caller's responsibility to ensure that the pages backing
the I/O request are wired for the lifetime of
.Fa sg .
Note also that this routine does not modify
.Fa uio .
.Pp
The
.Nm sglist_append_user
function appends the physical address ranges described by a single user
virtual address range to the scatter/gather list
.Fa sg .
The user virtual address range is relative to the address space of the thread
.Fa td .
It starts at
.Fa buf
and is
.Fa len
bytes long.
Note that it is the caller's responsibility to ensure that the pages backing
the user buffer are wired for the lifetime of
.Fa sg .
.Pp
The
.Nm sglist_consume_uio
function is a variation of
.Nm sglist_append_uio .
As with
.Nm sglist_append_uio ,
it appends the physical address ranges described by
.Fa uio
to the scatter/gather list
.Fa sg .
Unlike
.Nm sglist_append_uio ,
however,
.Nm sglist_consume_uio
modifies the I/O request to indicate that the appended address ranges have
been processed similar to calling
.Xr uiomove 9 .
This routine will only append ranges that describe up to
.Fa resid
total bytes in length.
If the available segments in the scatter/gather list are exhausted before
.Fa resid
bytes are processed,
then the
.Fa uio
structure will be updated to reflect the actual number of bytes processed,
and
.Nm sglist_consume_io
will return zero to indicate success.
In effect, this function will perform partial reads or writes.
The caller can compare the
.Fa uio_resid
member of
.Fa uio
before and after calling
.Nm sglist_consume_uio
to determine the actual number of bytes processed.
.Ss Manipulating Scatter/Gather Lists
The
.Nm sglist_join
function appends physical address ranges from the scatter/gather list
.Fa second
onto
.Fa first
and then resets
.Fa second
to an empty list.
It returns zero on success or an error on failure.
.Pp
The
.Nm sglist_split
function splits an existing scatter/gather list into two lists.
The first
.Fa length
bytes described by the list
.Fa original
are moved to a new list
.Fa *head .
If
.Fa original
describes a total address range that is smaller than
.Fa length
bytes,
then all of the address ranges will be moved to the new list at
.Fa *head
and
.Fa original
will be an empty list.
The caller may supply an existing scatter/gather list in
.Fa *head .
If so, the list must be empty.
Otherwise, the caller may set
.Fa *head
to
.Dv NULL
in which case a new scatter/gather list will be allocated.
In that case,
.Fa mflags
may be set to either
.Dv M_NOWAIT
or
.Dv M_WAITOK .
Note that since the
.Fa original
list is modified by this call, it must be a private list with no other
references.
The
.Nm sglist_split
function returns zero on success or an error on failure.
.Pp
The
.Nm sglist_slice
function generates a new scatter/gather list from a sub-range of an existing
scatter/gather list
.Fa original .
The sub-range to extract is specified by the
.Fa offset
and
.Fa length
parameters.
The new scatter/gather list is stored in
.Fa *slice .
As with
.Fa head
for
.Nm sglist_join ,
the caller may either provide an empty scatter/gather list,
or it may set
.Fa *slice
to
.Dv NULL
in which case
.Nm sglist_slice
will allocate a new list subject to
.Fa mflags .
Unlike
.Nm sglist_split ,
.Nm sglist_slice
does not modify
.Fa original
and does not require it to be a private list.
The
.Nm sglist_split
function returns zero on success or an error on failure.
.Ss Miscellaneous Routines
The
.Nm sglist_reset
function clears the scatter/gather list
.Fa sg
so that it no longer maps any address ranges.
This can allow reuse of a single scatter/gather list object for multiple
requests.
.Pp
The
.Nm sglist_length
function returns the total length of the physical address ranges described
by the scatter/gather list
.Fa sg .
.Sh RETURN VALUES
The
.Nm sglist_alloc ,
.Nm sglist_build ,
and
.Nm sglist_clone
functions return a new scatter/gather list on success or
.Dv NULL
on failure.
.Pp
The
.Nm sglist_append
family of functions and the
.Nm sglist_consume_uio ,
.Nm sglist_join ,
.Nm sglist_slice ,
and
.Nm sglist_split
functions return zero on success or an error on failure.
.Pp
The
.Nm sglist_count
function returns a count of scatter/gather list elements.
.Pp
The
.Nm sglist_length
function returns a count of address space described by a scatter/gather list
in bytes.
.Sh ERRORS
The
.Nm sglist_append
functions return the following errors on failure:
.Bl -tag -width Er
.It Bq Er EINVAL
The scatter/gather list has zero segments.
.It Bq Er EFBIG
There are not enough available segments in the scatter/gather list to append
the specified physical address ranges.
.El
.Pp
The
.Nm sglist_consume_uio
function returns the following error on failure:
.Bl -tag -width Er
.It Bq Er EINVAL
The scatter/gather list has zero segments.
.El
.Pp
The
.Nm sglist_join
function returns the following error on failure:
.Bl -tag -width Er
.It Bq Er EFBIG
There are not enough available segments in the scatter/gather list
.Fa first
to append the physical address ranges from
.Fa second .
.El
The
.Nm sglist_slice
function returns the following errors on failure:
.Bl -tag -width Er
.It Bq Er EINVAL
The
.Fa original
scatter/gather list does not describe enough address space to cover the
requested sub-range.
.It Bq Er EINVAL
The caller-supplied scatter/gather list in
.Fa *slice
is not empty.
.It Bq Er ENOMEM
An attempt to allocate a new scatter/gather list with
.Dv M_NOWAIT
set in
.Fa mflags
failed.
.It Bq Er EFBIG
There are not enough available segments in the caller-supplied scatter/gather
list in
.Fa *slice
to describe the requested physical address ranges.
.El
The
.Nm sglist_split
function returns the following errors on failure:
.Bl -tag -width Er
.It Bq Er EDOOFUS
The
.Fa original
scatter/gather list has more than one reference.
.It Bq Er EINVAL
The caller-supplied scatter/gather list in
.Fa *head
is not empty.
.It Bq Er ENOMEM
An attempt to allocate a new scatter/gather list with
.Dv M_NOWAIT
set in
.Fa mflags
failed.
.It Bq Er EFBIG
There are not enough available segments in the caller-supplied scatter/gather
list in
.Fa *head
to describe the requested physical address ranges.
.El
.Sh SEE ALSO
.Xr malloc 9 ,
.Xr mbuf 9 ,
.Xr uio 9
.Sh HISTORY
This API was first introduced in
.Fx 8.0 .

View File

@ -1994,6 +1994,7 @@ kern/subr_rman.c standard
kern/subr_rtc.c standard
kern/subr_sbuf.c standard
kern/subr_scanf.c standard
kern/subr_sglist.c standard
kern/subr_sleepqueue.c standard
kern/subr_smp.c standard
kern/subr_stack.c optional ddb | stack | ktr

656
sys/kern/subr_sglist.c Normal file
View File

@ -0,0 +1,656 @@
/*-
* Copyright (c) 2008 Yahoo!, Inc.
* All rights reserved.
* Written by: John Baldwin <jhb@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the author nor the names of any co-contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/proc.h>
#include <sys/sglist.h>
#include <sys/uio.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
#include <sys/ktr.h>
static MALLOC_DEFINE(M_SGLIST, "sglist", "scatter/gather lists");
/*
* Append a single (paddr, len) to a sglist. sg is the list and ss is
* the current segment in the list. If we run out of segments then
* EFBIG will be returned.
*/
static __inline int
_sglist_append_range(struct sglist *sg, struct sglist_seg **ssp,
vm_paddr_t paddr, size_t len)
{
struct sglist_seg *ss;
ss = *ssp;
if (ss->ss_paddr + ss->ss_len == paddr)
ss->ss_len += len;
else {
if (sg->sg_nseg == sg->sg_maxseg) {
sg->sg_nseg = 0;
return (EFBIG);
}
ss++;
ss->ss_paddr = paddr;
ss->ss_len = len;
sg->sg_nseg++;
*ssp = ss;
}
return (0);
}
/*
* Worker routine to append a virtual address range (either kernel or
* user) to a scatter/gather list.
*/
static __inline int
_sglist_append_buf(struct sglist *sg, void *buf, size_t len, pmap_t pmap,
size_t *donep)
{
struct sglist_seg *ss;
vm_offset_t vaddr, offset;
vm_paddr_t paddr;
size_t seglen;
int error;
if (donep)
*donep = 0;
if (len == 0)
return (0);
/* Do the first page. It may have an offset. */
vaddr = (vm_offset_t)buf;
offset = vaddr & PAGE_MASK;
if (pmap != NULL)
paddr = pmap_extract(pmap, vaddr);
else
paddr = pmap_kextract(vaddr);
seglen = MIN(len, PAGE_SIZE - offset);
if (sg->sg_nseg == 0) {
ss = sg->sg_segs;
ss->ss_paddr = paddr;
ss->ss_len = seglen;
sg->sg_nseg = 1;
error = 0;
} else {
ss = &sg->sg_segs[sg->sg_nseg - 1];
error = _sglist_append_range(sg, &ss, paddr, seglen);
}
while (error == 0 && len > seglen) {
vaddr += seglen;
len -= seglen;
if (donep)
*donep += seglen;
seglen = MIN(len, PAGE_SIZE);
if (pmap != NULL)
paddr = pmap_extract(pmap, vaddr);
else
paddr = pmap_kextract(vaddr);
error = _sglist_append_range(sg, &ss, paddr, seglen);
}
return (error);
}
/*
* Determine the number of scatter/gather list elements needed to
* describe a kernel virtual address range.
*/
int
sglist_count(void *buf, size_t len)
{
vm_offset_t vaddr, vendaddr;
vm_paddr_t lastaddr, paddr;
int nsegs;
if (len == 0)
return (0);
vaddr = trunc_page((vm_offset_t)buf);
vendaddr = (vm_offset_t)buf + len;
nsegs = 1;
lastaddr = pmap_kextract(vaddr);
vaddr += PAGE_SIZE;
while (vaddr < vendaddr) {
paddr = pmap_kextract(vaddr);
if (lastaddr + PAGE_SIZE != paddr)
nsegs++;
lastaddr = paddr;
vaddr += PAGE_SIZE;
}
return (nsegs);
}
/*
* Allocate a scatter/gather list along with 'nsegs' segments. The
* 'mflags' parameters are the same as passed to malloc(9). The caller
* should use sglist_free() to free this list.
*/
struct sglist *
sglist_alloc(int nsegs, int mflags)
{
struct sglist *sg;
sg = malloc(sizeof(struct sglist) + nsegs * sizeof(struct sglist_seg),
M_SGLIST, mflags);
if (sg == NULL)
return (NULL);
sglist_init(sg, nsegs, (struct sglist_seg *)(sg + 1));
return (sg);
}
/*
* Free a scatter/gather list allocated via sglist_allc().
*/
void
sglist_free(struct sglist *sg)
{
if (refcount_release(&sg->sg_refs))
free(sg, M_SGLIST);
}
/*
* Append the segments to describe a single kernel virtual address
* range to a scatter/gather list. If there are insufficient
* segments, then this fails with EFBIG.
*/
int
sglist_append(struct sglist *sg, void *buf, size_t len)
{
if (sg->sg_maxseg == 0)
return (EINVAL);
return (_sglist_append_buf(sg, buf, len, NULL, NULL));
}
/*
* Append a single physical address range to a scatter/gather list.
* If there are insufficient segments, then this fails with EFBIG.
*/
int
sglist_append_phys(struct sglist *sg, vm_paddr_t paddr, size_t len)
{
struct sglist_seg *ss;
if (sg->sg_maxseg == 0)
return (EINVAL);
if (len == 0)
return (0);
if (sg->sg_nseg == 0) {
sg->sg_segs[0].ss_paddr = paddr;
sg->sg_segs[0].ss_len = len;
sg->sg_nseg = 1;
return (0);
}
ss = &sg->sg_segs[sg->sg_nseg - 1];
return (_sglist_append_range(sg, &ss, paddr, len));
}
/*
* Append the segments that describe a single mbuf chain to a
* scatter/gather list. If there are insufficient segments, then this
* fails with EFBIG.
*/
int
sglist_append_mbuf(struct sglist *sg, struct mbuf *m0)
{
struct mbuf *m;
int error;
if (sg->sg_maxseg == 0)
return (EINVAL);
error = 0;
for (m = m0; m != NULL; m = m->m_next) {
if (m->m_len > 0) {
error = sglist_append(sg, m->m_data, m->m_len);
if (error)
return (error);
}
}
return (0);
}
/*
* Append the segments that describe a single user address range to a
* scatter/gather list. If there are insufficient segments, then this
* fails with EFBIG.
*/
int
sglist_append_user(struct sglist *sg, void *buf, size_t len, struct thread *td)
{
if (sg->sg_maxseg == 0)
return (EINVAL);
return (_sglist_append_buf(sg, buf, len,
vmspace_pmap(td->td_proc->p_vmspace), NULL));
}
/*
* Append the segments that describe a single uio to a scatter/gather
* list. If there are insufficient segments, then this fails with
* EFBIG.
*/
int
sglist_append_uio(struct sglist *sg, struct uio *uio)
{
struct iovec *iov;
size_t resid, minlen;
pmap_t pmap;
int error, i;
if (sg->sg_maxseg == 0)
return (EINVAL);
resid = uio->uio_resid;
iov = uio->uio_iov;
if (uio->uio_segflg == UIO_USERSPACE) {
KASSERT(uio->uio_td != NULL,
("sglist_append_uio: USERSPACE but no thread"));
pmap = vmspace_pmap(uio->uio_td->td_proc->p_vmspace);
} else
pmap = NULL;
error = 0;
for (i = 0; i < uio->uio_iovcnt && resid != 0; i++) {
/*
* Now at the first iovec to load. Load each iovec
* until we have exhausted the residual count.
*/
minlen = MIN(resid, iov[i].iov_len);
if (minlen > 0) {
error = _sglist_append_buf(sg, iov[i].iov_base, minlen,
pmap, NULL);
if (error)
return (error);
resid -= minlen;
}
}
return (0);
}
/*
* Append the segments that describe at most 'resid' bytes from a
* single uio to a scatter/gather list. If there are insufficient
* segments, then only the amount that fits is appended.
*/
int
sglist_consume_uio(struct sglist *sg, struct uio *uio, int resid)
{
struct iovec *iov;
size_t done;
pmap_t pmap;
int error, len;
if (sg->sg_maxseg == 0)
return (EINVAL);
if (uio->uio_segflg == UIO_USERSPACE) {
KASSERT(uio->uio_td != NULL,
("sglist_consume_uio: USERSPACE but no thread"));
pmap = vmspace_pmap(uio->uio_td->td_proc->p_vmspace);
} else
pmap = NULL;
error = 0;
while (resid > 0 && uio->uio_resid) {
iov = uio->uio_iov;
len = iov->iov_len;
if (len == 0) {
uio->uio_iov++;
uio->uio_iovcnt--;
continue;
}
if (len > resid)
len = resid;
/*
* Try to append this iovec. If we run out of room,
* then break out of the loop.
*/
error = _sglist_append_buf(sg, iov->iov_base, len, pmap, &done);
iov->iov_base = (char *)iov->iov_base + done;
iov->iov_len -= done;
uio->uio_resid -= done;
uio->uio_offset += done;
resid -= done;
if (error)
break;
}
return (0);
}
/*
* Allocate and populate a scatter/gather list to describe a single
* kernel virtual address range.
*/
struct sglist *
sglist_build(void *buf, size_t len, int mflags)
{
struct sglist *sg;
int nsegs;
if (len == 0)
return (NULL);
nsegs = sglist_count(buf, len);
sg = sglist_alloc(nsegs, mflags);
if (sg == NULL)
return (NULL);
if (sglist_append(sg, buf, len) != 0) {
sglist_free(sg);
return (NULL);
}
return (sg);
}
/*
* Clone a new copy of a scatter/gather list.
*/
struct sglist *
sglist_clone(struct sglist *sg, int mflags)
{
struct sglist *new;
if (sg == NULL)
return (NULL);
new = sglist_alloc(sg->sg_maxseg, mflags);
if (new == NULL)
return (NULL);
bcopy(sg->sg_segs, new->sg_segs, sizeof(struct sglist_seg) *
sg->sg_nseg);
return (new);
}
/*
* Calculate the total length of the segments described in a
* scatter/gather list.
*/
size_t
sglist_length(struct sglist *sg)
{
size_t space;
int i;
space = 0;
for (i = 0; i < sg->sg_nseg; i++)
space += sg->sg_segs[i].ss_len;
return (space);
}
/*
* Split a scatter/gather list into two lists. The scatter/gather
* entries for the first 'length' bytes of the 'original' list are
* stored in the '*head' list and are removed from 'original'.
*
* If '*head' is NULL, then a new list will be allocated using
* 'mflags'. If M_NOWAIT is specified and the allocation fails,
* ENOMEM will be returned.
*
* If '*head' is not NULL, it should point to an empty sglist. If it
* does not have enough room for the remaining space, then EFBIG will
* be returned. If '*head' is not empty, then EINVAL will be
* returned.
*
* If 'original' is shared (refcount > 1), then EDOOFUS will be
* returned.
*/
int
sglist_split(struct sglist *original, struct sglist **head, size_t length,
int mflags)
{
struct sglist *sg;
size_t space, split;
int count, i;
if (original->sg_refs > 1)
return (EDOOFUS);
/* Figure out how big of a sglist '*head' has to hold. */
count = 0;
space = 0;
split = 0;
for (i = 0; i < original->sg_nseg; i++) {
space += original->sg_segs[i].ss_len;
count++;
if (space >= length) {
/*
* If 'length' falls in the middle of a
* scatter/gather list entry, then 'split'
* holds how much of that entry will remain in
* 'original'.
*/
split = space - length;
break;
}
}
/* Nothing to do, so leave head empty. */
if (count == 0)
return (0);
if (*head == NULL) {
sg = sglist_alloc(count, mflags);
if (sg == NULL)
return (ENOMEM);
*head = sg;
} else {
sg = *head;
if (sg->sg_maxseg < count)
return (EFBIG);
if (sg->sg_nseg != 0)
return (EINVAL);
}
/* Copy 'count' entries to 'sg' from 'original'. */
bcopy(original->sg_segs, sg->sg_segs, count *
sizeof(struct sglist_seg));
sg->sg_nseg = count;
/*
* If we had to split a list entry, fixup the last entry in
* 'sg' and the new first entry in 'original'. We also
* decrement 'count' by 1 since we will only be removing
* 'count - 1' segments from 'original' now.
*/
if (split != 0) {
count--;
sg->sg_segs[count].ss_len -= split;
original->sg_segs[count].ss_paddr =
sg->sg_segs[count].ss_paddr + split;
original->sg_segs[count].ss_len = split;
}
/* Trim 'count' entries from the front of 'original'. */
original->sg_nseg -= count;
bcopy(original->sg_segs + count, original->sg_segs, count *
sizeof(struct sglist_seg));
return (0);
}
/*
* Append the scatter/gather list elements in 'second' to the
* scatter/gather list 'first'. If there is not enough space in
* 'first', EFBIG is returned.
*/
int
sglist_join(struct sglist *first, struct sglist *second)
{
struct sglist_seg *flast, *sfirst;
int append;
/* If 'second' is empty, there is nothing to do. */
if (second->sg_nseg == 0)
return (0);
/*
* If the first entry in 'second' can be appended to the last entry
* in 'first' then set append to '1'.
*/
append = 0;
flast = &first->sg_segs[first->sg_nseg - 1];
sfirst = &second->sg_segs[0];
if (first->sg_nseg != 0 &&
flast->ss_paddr + flast->ss_len == sfirst->ss_paddr)
append = 1;
/* Make sure 'first' has enough room. */
if (first->sg_nseg + second->sg_nseg - append > first->sg_maxseg)
return (EFBIG);
/* Merge last in 'first' and first in 'second' if needed. */
if (append)
flast->ss_len += sfirst->ss_len;
/* Append new segments from 'second' to 'first'. */
bcopy(first->sg_segs + first->sg_nseg, second->sg_segs + append,
(second->sg_nseg - append) * sizeof(struct sglist_seg));
first->sg_nseg += second->sg_nseg - append;
sglist_reset(second);
return (0);
}
/*
* Generate a new scatter/gather list from a range of an existing
* scatter/gather list. The 'offset' and 'length' parameters specify
* the logical range of the 'original' list to extract. If that range
* is not a subset of the length of 'original', then EINVAL is
* returned. The new scatter/gather list is stored in '*slice'.
*
* If '*slice' is NULL, then a new list will be allocated using
* 'mflags'. If M_NOWAIT is specified and the allocation fails,
* ENOMEM will be returned.
*
* If '*slice' is not NULL, it should point to an empty sglist. If it
* does not have enough room for the remaining space, then EFBIG will
* be returned. If '*slice' is not empty, then EINVAL will be
* returned.
*/
int
sglist_slice(struct sglist *original, struct sglist **slice, size_t offset,
size_t length, int mflags)
{
struct sglist *sg;
size_t space, end, foffs, loffs;
int count, i, fseg;
/* Nothing to do. */
if (length == 0)
return (0);
/* Figure out how many segments '*slice' needs to have. */
end = offset + length;
space = 0;
count = 0;
fseg = 0;
foffs = loffs = 0;
for (i = 0; i < original->sg_nseg; i++) {
space += original->sg_segs[i].ss_len;
if (space > offset) {
/*
* When we hit the first segment, store its index
* in 'fseg' and the offset into the first segment
* of 'offset' in 'foffs'.
*/
if (count == 0) {
fseg = i;
foffs = offset - (space -
original->sg_segs[i].ss_len);
CTR1(KTR_DEV, "sglist_slice: foffs = %08lx",
foffs);
}
count++;
/*
* When we hit the last segment, break out of
* the loop. Store the amount of extra space
* at the end of this segment in 'loffs'.
*/
if (space >= end) {
loffs = space - end;
CTR1(KTR_DEV, "sglist_slice: loffs = %08lx",
loffs);
break;
}
}
}
/* If we never hit 'end', then 'length' ran off the end, so fail. */
if (space < end)
return (EINVAL);
if (*slice == NULL) {
sg = sglist_alloc(count, mflags);
if (sg == NULL)
return (ENOMEM);
*slice = sg;
} else {
sg = *slice;
if (sg->sg_maxseg < count)
return (EFBIG);
if (sg->sg_nseg != 0)
return (EINVAL);
}
/*
* Copy over 'count' segments from 'original' starting at
* 'fseg' to 'sg'.
*/
bcopy(original->sg_segs + fseg, sg->sg_segs,
count * sizeof(struct sglist_seg));
sg->sg_nseg = count;
/* Fixup first and last segments if needed. */
if (foffs != 0) {
sg->sg_segs[0].ss_paddr += foffs;
sg->sg_segs[0].ss_len -= foffs;
CTR2(KTR_DEV, "sglist_slice seg[0]: %08lx:%08lx",
(long)sg->sg_segs[0].ss_paddr, sg->sg_segs[0].ss_len);
}
if (loffs != 0) {
sg->sg_segs[count - 1].ss_len -= loffs;
CTR2(KTR_DEV, "sglist_slice seg[%d]: len %08x", count - 1,
sg->sg_segs[count - 1].ss_len);
}
return (0);
}

104
sys/sys/sglist.h Normal file
View File

@ -0,0 +1,104 @@
/*-
* Copyright (c) 2008 Yahoo!, Inc.
* All rights reserved.
* Written by: John Baldwin <jhb@FreeBSD.org>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the author nor the names of any co-contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
/*
* A scatter/gather list describes a group of physical address ranges.
* Each physical address range consists of a starting address and a
* length.
*/
#ifndef __SGLIST_H__
#define __SGLIST_H__
#include <sys/refcount.h>
struct sglist_seg {
vm_paddr_t ss_paddr;
size_t ss_len;
};
struct sglist {
struct sglist_seg *sg_segs;
int sg_refs;
u_short sg_nseg;
u_short sg_maxseg;
};
struct mbuf;
struct uio;
static __inline void
sglist_init(struct sglist *sg, u_short maxsegs, struct sglist_seg *segs)
{
sg->sg_segs = segs;
sg->sg_nseg = 0;
sg->sg_maxseg = maxsegs;
refcount_init(&sg->sg_refs, 1);
}
static __inline void
sglist_reset(struct sglist *sg)
{
sg->sg_nseg = 0;
}
static __inline struct sglist *
sglist_hold(struct sglist *sg)
{
refcount_acquire(&sg->sg_refs);
return (sg);
}
struct sglist *sglist_alloc(int nsegs, int mflags);
int sglist_append(struct sglist *sg, void *buf, size_t len);
int sglist_append_mbuf(struct sglist *sg, struct mbuf *m0);
int sglist_append_phys(struct sglist *sg, vm_paddr_t paddr,
size_t len);
int sglist_append_uio(struct sglist *sg, struct uio *uio);
int sglist_append_user(struct sglist *sg, void *buf, size_t len,
struct thread *td);
struct sglist *sglist_build(void *buf, size_t len, int mflags);
struct sglist *sglist_clone(struct sglist *sg, int mflags);
int sglist_consume_uio(struct sglist *sg, struct uio *uio, int resid);
int sglist_count(void *buf, size_t len);
void sglist_free(struct sglist *sg);
int sglist_join(struct sglist *first, struct sglist *second);
size_t sglist_length(struct sglist *sg);
int sglist_slice(struct sglist *original, struct sglist **slice,
size_t offset, size_t length, int mflags);
int sglist_split(struct sglist *original, struct sglist **head,
size_t length, int mflags);
#endif /* !__SGLIST_H__ */