2005-01-05 22:34:37 +00:00
|
|
|
/*-
|
1998-09-15 06:33:23 +00:00
|
|
|
* CAM request queue management functions.
|
|
|
|
*
|
2017-11-27 15:12:43 +00:00
|
|
|
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
|
|
|
|
*
|
1998-09-15 06:33:23 +00:00
|
|
|
* Copyright (c) 1997 Justin T. Gibbs.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions, and the following disclaimer,
|
|
|
|
* without modification, immediately at the beginning of the file.
|
|
|
|
* 2. The name of the author may not be used to endorse or promote products
|
|
|
|
* derived from this software without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
|
|
|
|
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*/
|
2003-06-10 17:50:20 +00:00
|
|
|
|
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
1998-09-15 06:33:23 +00:00
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/systm.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/malloc.h>
|
2005-07-01 15:21:30 +00:00
|
|
|
#include <sys/kernel.h>
|
1998-09-15 06:33:23 +00:00
|
|
|
|
|
|
|
#include <cam/cam.h>
|
|
|
|
#include <cam/cam_ccb.h>
|
|
|
|
#include <cam/cam_queue.h>
|
|
|
|
#include <cam/cam_debug.h>
|
|
|
|
|
2011-11-07 06:44:47 +00:00
|
|
|
static MALLOC_DEFINE(M_CAMQ, "CAM queue", "CAM queue buffers");
|
|
|
|
static MALLOC_DEFINE(M_CAMDEVQ, "CAM dev queue", "CAM dev queue buffers");
|
|
|
|
static MALLOC_DEFINE(M_CAMCCBQ, "CAM ccb queue", "CAM ccb queue buffers");
|
2005-07-01 15:21:30 +00:00
|
|
|
|
1998-09-15 06:33:23 +00:00
|
|
|
static __inline int
|
|
|
|
queue_cmp(cam_pinfo **queue_array, int i, int j);
|
|
|
|
static __inline void
|
|
|
|
swap(cam_pinfo **queue_array, int i, int j);
|
|
|
|
static void heap_up(cam_pinfo **queue_array, int new_index);
|
|
|
|
static void heap_down(cam_pinfo **queue_array, int index,
|
|
|
|
int last_index);
|
|
|
|
|
|
|
|
int
|
|
|
|
camq_init(struct camq *camq, int size)
|
|
|
|
{
|
|
|
|
bzero(camq, sizeof(*camq));
|
|
|
|
camq->array_size = size;
|
|
|
|
if (camq->array_size != 0) {
|
|
|
|
camq->queue_array = (cam_pinfo**)malloc(size*sizeof(cam_pinfo*),
|
2005-07-01 15:21:30 +00:00
|
|
|
M_CAMQ, M_NOWAIT);
|
1998-09-15 06:33:23 +00:00
|
|
|
if (camq->queue_array == NULL) {
|
|
|
|
printf("camq_init: - cannot malloc array!\n");
|
|
|
|
return (1);
|
|
|
|
}
|
1999-04-19 21:26:08 +00:00
|
|
|
/*
|
|
|
|
* Heap algorithms like everything numbered from 1, so
|
|
|
|
* offset our pointer into the heap array by one element.
|
|
|
|
*/
|
|
|
|
camq->queue_array--;
|
1998-09-15 06:33:23 +00:00
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Free a camq structure. This should only be called if a controller
|
|
|
|
* driver failes somehow during its attach routine or is unloaded and has
|
|
|
|
* obtained a camq structure. The XPT should ensure that the queue
|
|
|
|
* is empty before calling this routine.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
camq_fini(struct camq *queue)
|
|
|
|
{
|
|
|
|
if (queue->queue_array != NULL) {
|
1999-04-19 21:26:08 +00:00
|
|
|
/*
|
|
|
|
* Heap algorithms like everything numbered from 1, so
|
|
|
|
* our pointer into the heap array is offset by one element.
|
|
|
|
*/
|
|
|
|
queue->queue_array++;
|
2005-07-01 15:21:30 +00:00
|
|
|
free(queue->queue_array, M_CAMQ);
|
1998-09-15 06:33:23 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
u_int32_t
|
|
|
|
camq_resize(struct camq *queue, int new_size)
|
|
|
|
{
|
|
|
|
cam_pinfo **new_array;
|
|
|
|
|
2012-09-28 12:13:34 +00:00
|
|
|
KASSERT(new_size >= queue->entries, ("camq_resize: "
|
2016-04-29 21:04:06 +00:00
|
|
|
"New queue size can't accommodate queued entries (%d < %d).",
|
2012-09-28 12:13:34 +00:00
|
|
|
new_size, queue->entries));
|
2018-01-21 15:42:36 +00:00
|
|
|
new_array = (cam_pinfo **)malloc(new_size * sizeof(cam_pinfo *),
|
2005-07-01 15:21:30 +00:00
|
|
|
M_CAMQ, M_NOWAIT);
|
1998-09-15 06:33:23 +00:00
|
|
|
if (new_array == NULL) {
|
|
|
|
/* Couldn't satisfy request */
|
|
|
|
return (CAM_RESRC_UNAVAIL);
|
|
|
|
}
|
1999-04-19 21:26:08 +00:00
|
|
|
/*
|
|
|
|
* Heap algorithms like everything numbered from 1, so
|
|
|
|
* remember that our pointer into the heap array is offset
|
|
|
|
* by one element.
|
|
|
|
*/
|
1998-09-15 06:33:23 +00:00
|
|
|
if (queue->queue_array != NULL) {
|
1999-04-19 21:26:08 +00:00
|
|
|
queue->queue_array++;
|
1998-09-15 06:33:23 +00:00
|
|
|
bcopy(queue->queue_array, new_array,
|
|
|
|
queue->entries * sizeof(cam_pinfo *));
|
2005-07-01 15:21:30 +00:00
|
|
|
free(queue->queue_array, M_CAMQ);
|
1998-09-15 06:33:23 +00:00
|
|
|
}
|
1999-04-19 21:26:08 +00:00
|
|
|
queue->queue_array = new_array-1;
|
1998-09-15 06:33:23 +00:00
|
|
|
queue->array_size = new_size;
|
|
|
|
return (CAM_REQ_CMP);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* camq_insert: Given an array of cam_pinfo* elememnts with
|
1999-04-19 21:26:08 +00:00
|
|
|
* the Heap(1, num_elements) property and array_size - num_elements >= 1,
|
|
|
|
* output Heap(1, num_elements+1) including new_entry in the array.
|
1998-09-15 06:33:23 +00:00
|
|
|
*/
|
|
|
|
void
|
|
|
|
camq_insert(struct camq *queue, cam_pinfo *new_entry)
|
|
|
|
{
|
2012-09-28 12:13:34 +00:00
|
|
|
|
|
|
|
KASSERT(queue->entries < queue->array_size,
|
|
|
|
("camq_insert: Attempt to insert into a full queue (%d >= %d)",
|
|
|
|
queue->entries, queue->array_size));
|
1999-04-19 21:26:08 +00:00
|
|
|
queue->entries++;
|
1998-09-15 06:33:23 +00:00
|
|
|
queue->queue_array[queue->entries] = new_entry;
|
|
|
|
new_entry->index = queue->entries;
|
|
|
|
if (queue->entries != 0)
|
|
|
|
heap_up(queue->queue_array, queue->entries);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* camq_remove: Given an array of cam_pinfo* elevements with the
|
1999-04-19 21:26:08 +00:00
|
|
|
* Heap(1, num_elements) property and an index such that 1 <= index <=
|
|
|
|
* num_elements, remove that entry and restore the Heap(1, num_elements-1)
|
1998-09-15 06:33:23 +00:00
|
|
|
* property.
|
|
|
|
*/
|
|
|
|
cam_pinfo *
|
|
|
|
camq_remove(struct camq *queue, int index)
|
|
|
|
{
|
|
|
|
cam_pinfo *removed_entry;
|
|
|
|
|
2016-10-05 17:18:24 +00:00
|
|
|
if (index <= 0 || index > queue->entries)
|
|
|
|
panic("%s: Attempt to remove out-of-bounds index %d "
|
|
|
|
"from queue %p of size %d", __func__, index, queue,
|
|
|
|
queue->entries);
|
|
|
|
|
1998-09-15 06:33:23 +00:00
|
|
|
removed_entry = queue->queue_array[index];
|
|
|
|
if (queue->entries != index) {
|
|
|
|
queue->queue_array[index] = queue->queue_array[queue->entries];
|
|
|
|
queue->queue_array[index]->index = index;
|
1999-04-19 21:26:08 +00:00
|
|
|
heap_down(queue->queue_array, index, queue->entries - 1);
|
1998-09-15 06:33:23 +00:00
|
|
|
}
|
|
|
|
removed_entry->index = CAM_UNQUEUED_INDEX;
|
1999-04-19 21:26:08 +00:00
|
|
|
queue->entries--;
|
1998-09-15 06:33:23 +00:00
|
|
|
return (removed_entry);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* camq_change_priority: Given an array of cam_pinfo* elements with the
|
1999-04-19 21:26:08 +00:00
|
|
|
* Heap(1, num_entries) property, an index such that 1 <= index <= num_elements,
|
2002-12-30 21:18:15 +00:00
|
|
|
* and a new priority for the element at index, change the priority of
|
1998-09-15 06:33:23 +00:00
|
|
|
* element index and restore the Heap(0, num_elements) property.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
camq_change_priority(struct camq *queue, int index, u_int32_t new_priority)
|
|
|
|
{
|
|
|
|
if (new_priority > queue->queue_array[index]->priority) {
|
|
|
|
queue->queue_array[index]->priority = new_priority;
|
|
|
|
heap_down(queue->queue_array, index, queue->entries);
|
|
|
|
} else {
|
|
|
|
/* new_priority <= old_priority */
|
|
|
|
queue->queue_array[index]->priority = new_priority;
|
|
|
|
heap_up(queue->queue_array, index);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
struct cam_devq *
|
|
|
|
cam_devq_alloc(int devices, int openings)
|
|
|
|
{
|
|
|
|
struct cam_devq *devq;
|
|
|
|
|
2005-07-01 15:21:30 +00:00
|
|
|
devq = (struct cam_devq *)malloc(sizeof(*devq), M_CAMDEVQ, M_NOWAIT);
|
1998-09-15 06:33:23 +00:00
|
|
|
if (devq == NULL) {
|
|
|
|
printf("cam_devq_alloc: - cannot malloc!\n");
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
if (cam_devq_init(devq, devices, openings) != 0) {
|
2005-07-01 15:21:30 +00:00
|
|
|
free(devq, M_CAMDEVQ);
|
Merge CAM locking changes from the projects/camlock branch to radically
reduce lock congestion and improve SMP scalability of the SCSI/ATA stack,
preparing the ground for the coming next GEOM direct dispatch support.
Replace big per-SIM locks with bunch of smaller ones:
- per-LUN locks to protect device and peripheral drivers state;
- per-target locks to protect list of LUNs on target;
- per-bus locks to protect reference counting;
- per-send queue locks to protect queue of CCBs to be sent;
- per-done queue locks to protect queue of completed CCBs;
- remaining per-SIM locks now protect only HBA driver internals.
While holding LUN lock it is allowed (while not recommended for performance
reasons) to take SIM lock. The opposite acquisition order is forbidden.
All the other locks are leaf locks, that can be taken anywhere, but should
not be cascaded. Many functions, such as: xpt_action(), xpt_done(),
xpt_async(), xpt_create_path(), etc. are no longer require (but allow) SIM
lock to be held.
To keep compatibility and solve cases where SIM lock can't be dropped, all
xpt_async() calls in addition to xpt_done() calls are queued to completion
threads for async processing in clean environment without SIM lock held.
Instead of single CAM SWI thread, used for commands completion processing
before, use multiple (depending on number of CPUs) threads. Load balanced
between them using "hash" of the device B:T:L address.
HBA drivers that can drop SIM lock during completion processing and have
sufficient number of completion threads to efficiently scale to multiple
CPUs can use new function xpt_done_direct() to avoid extra context switch.
Make ahci(4) driver to use this mechanism depending on hardware setup.
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-21 12:00:26 +00:00
|
|
|
return (NULL);
|
1998-09-15 06:33:23 +00:00
|
|
|
}
|
|
|
|
return (devq);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
cam_devq_init(struct cam_devq *devq, int devices, int openings)
|
|
|
|
{
|
Merge CAM locking changes from the projects/camlock branch to radically
reduce lock congestion and improve SMP scalability of the SCSI/ATA stack,
preparing the ground for the coming next GEOM direct dispatch support.
Replace big per-SIM locks with bunch of smaller ones:
- per-LUN locks to protect device and peripheral drivers state;
- per-target locks to protect list of LUNs on target;
- per-bus locks to protect reference counting;
- per-send queue locks to protect queue of CCBs to be sent;
- per-done queue locks to protect queue of completed CCBs;
- remaining per-SIM locks now protect only HBA driver internals.
While holding LUN lock it is allowed (while not recommended for performance
reasons) to take SIM lock. The opposite acquisition order is forbidden.
All the other locks are leaf locks, that can be taken anywhere, but should
not be cascaded. Many functions, such as: xpt_action(), xpt_done(),
xpt_async(), xpt_create_path(), etc. are no longer require (but allow) SIM
lock to be held.
To keep compatibility and solve cases where SIM lock can't be dropped, all
xpt_async() calls in addition to xpt_done() calls are queued to completion
threads for async processing in clean environment without SIM lock held.
Instead of single CAM SWI thread, used for commands completion processing
before, use multiple (depending on number of CPUs) threads. Load balanced
between them using "hash" of the device B:T:L address.
HBA drivers that can drop SIM lock during completion processing and have
sufficient number of completion threads to efficiently scale to multiple
CPUs can use new function xpt_done_direct() to avoid extra context switch.
Make ahci(4) driver to use this mechanism depending on hardware setup.
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-21 12:00:26 +00:00
|
|
|
|
1998-09-15 06:33:23 +00:00
|
|
|
bzero(devq, sizeof(*devq));
|
Merge CAM locking changes from the projects/camlock branch to radically
reduce lock congestion and improve SMP scalability of the SCSI/ATA stack,
preparing the ground for the coming next GEOM direct dispatch support.
Replace big per-SIM locks with bunch of smaller ones:
- per-LUN locks to protect device and peripheral drivers state;
- per-target locks to protect list of LUNs on target;
- per-bus locks to protect reference counting;
- per-send queue locks to protect queue of CCBs to be sent;
- per-done queue locks to protect queue of completed CCBs;
- remaining per-SIM locks now protect only HBA driver internals.
While holding LUN lock it is allowed (while not recommended for performance
reasons) to take SIM lock. The opposite acquisition order is forbidden.
All the other locks are leaf locks, that can be taken anywhere, but should
not be cascaded. Many functions, such as: xpt_action(), xpt_done(),
xpt_async(), xpt_create_path(), etc. are no longer require (but allow) SIM
lock to be held.
To keep compatibility and solve cases where SIM lock can't be dropped, all
xpt_async() calls in addition to xpt_done() calls are queued to completion
threads for async processing in clean environment without SIM lock held.
Instead of single CAM SWI thread, used for commands completion processing
before, use multiple (depending on number of CPUs) threads. Load balanced
between them using "hash" of the device B:T:L address.
HBA drivers that can drop SIM lock during completion processing and have
sufficient number of completion threads to efficiently scale to multiple
CPUs can use new function xpt_done_direct() to avoid extra context switch.
Make ahci(4) driver to use this mechanism depending on hardware setup.
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-21 12:00:26 +00:00
|
|
|
mtx_init(&devq->send_mtx, "CAM queue lock", NULL, MTX_DEF);
|
MFprojects/camlock r248890, r248897, r248898, r248900, r248903, r248905,
r248917, r248918, r248978, r249001, r249014, r249030:
Remove multilevel freezing mechanism, implemented to handle specifics of
the ATA/SATA error recovery, when post-reset recovery commands should be
allocated when queues are already full of payload requests. Instead of
removing frozen CCBs with specified range of priorities from the queue
to provide free openings, use simple hack, allowing explicit CCBs over-
allocation for requests with priority higher (numerically lower) then
CAM_PRIORITY_OOB threshold.
Simplify CCB allocation logic by removing SIM-level allocation queue.
After that SIM-level queue manages only CCBs execution, while allocation
logic is localized within each single device.
Suggested by: gibbs
2013-04-14 09:28:14 +00:00
|
|
|
if (camq_init(&devq->send_queue, devices) != 0)
|
1998-09-15 06:33:23 +00:00
|
|
|
return (1);
|
|
|
|
devq->send_openings = openings;
|
Merge CAM locking changes from the projects/camlock branch to radically
reduce lock congestion and improve SMP scalability of the SCSI/ATA stack,
preparing the ground for the coming next GEOM direct dispatch support.
Replace big per-SIM locks with bunch of smaller ones:
- per-LUN locks to protect device and peripheral drivers state;
- per-target locks to protect list of LUNs on target;
- per-bus locks to protect reference counting;
- per-send queue locks to protect queue of CCBs to be sent;
- per-done queue locks to protect queue of completed CCBs;
- remaining per-SIM locks now protect only HBA driver internals.
While holding LUN lock it is allowed (while not recommended for performance
reasons) to take SIM lock. The opposite acquisition order is forbidden.
All the other locks are leaf locks, that can be taken anywhere, but should
not be cascaded. Many functions, such as: xpt_action(), xpt_done(),
xpt_async(), xpt_create_path(), etc. are no longer require (but allow) SIM
lock to be held.
To keep compatibility and solve cases where SIM lock can't be dropped, all
xpt_async() calls in addition to xpt_done() calls are queued to completion
threads for async processing in clean environment without SIM lock held.
Instead of single CAM SWI thread, used for commands completion processing
before, use multiple (depending on number of CPUs) threads. Load balanced
between them using "hash" of the device B:T:L address.
HBA drivers that can drop SIM lock during completion processing and have
sufficient number of completion threads to efficiently scale to multiple
CPUs can use new function xpt_done_direct() to avoid extra context switch.
Make ahci(4) driver to use this mechanism depending on hardware setup.
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-21 12:00:26 +00:00
|
|
|
devq->send_active = 0;
|
|
|
|
return (0);
|
1998-09-15 06:33:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
cam_devq_free(struct cam_devq *devq)
|
|
|
|
{
|
Merge CAM locking changes from the projects/camlock branch to radically
reduce lock congestion and improve SMP scalability of the SCSI/ATA stack,
preparing the ground for the coming next GEOM direct dispatch support.
Replace big per-SIM locks with bunch of smaller ones:
- per-LUN locks to protect device and peripheral drivers state;
- per-target locks to protect list of LUNs on target;
- per-bus locks to protect reference counting;
- per-send queue locks to protect queue of CCBs to be sent;
- per-done queue locks to protect queue of completed CCBs;
- remaining per-SIM locks now protect only HBA driver internals.
While holding LUN lock it is allowed (while not recommended for performance
reasons) to take SIM lock. The opposite acquisition order is forbidden.
All the other locks are leaf locks, that can be taken anywhere, but should
not be cascaded. Many functions, such as: xpt_action(), xpt_done(),
xpt_async(), xpt_create_path(), etc. are no longer require (but allow) SIM
lock to be held.
To keep compatibility and solve cases where SIM lock can't be dropped, all
xpt_async() calls in addition to xpt_done() calls are queued to completion
threads for async processing in clean environment without SIM lock held.
Instead of single CAM SWI thread, used for commands completion processing
before, use multiple (depending on number of CPUs) threads. Load balanced
between them using "hash" of the device B:T:L address.
HBA drivers that can drop SIM lock during completion processing and have
sufficient number of completion threads to efficiently scale to multiple
CPUs can use new function xpt_done_direct() to avoid extra context switch.
Make ahci(4) driver to use this mechanism depending on hardware setup.
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-21 12:00:26 +00:00
|
|
|
|
1999-08-16 02:23:42 +00:00
|
|
|
camq_fini(&devq->send_queue);
|
Merge CAM locking changes from the projects/camlock branch to radically
reduce lock congestion and improve SMP scalability of the SCSI/ATA stack,
preparing the ground for the coming next GEOM direct dispatch support.
Replace big per-SIM locks with bunch of smaller ones:
- per-LUN locks to protect device and peripheral drivers state;
- per-target locks to protect list of LUNs on target;
- per-bus locks to protect reference counting;
- per-send queue locks to protect queue of CCBs to be sent;
- per-done queue locks to protect queue of completed CCBs;
- remaining per-SIM locks now protect only HBA driver internals.
While holding LUN lock it is allowed (while not recommended for performance
reasons) to take SIM lock. The opposite acquisition order is forbidden.
All the other locks are leaf locks, that can be taken anywhere, but should
not be cascaded. Many functions, such as: xpt_action(), xpt_done(),
xpt_async(), xpt_create_path(), etc. are no longer require (but allow) SIM
lock to be held.
To keep compatibility and solve cases where SIM lock can't be dropped, all
xpt_async() calls in addition to xpt_done() calls are queued to completion
threads for async processing in clean environment without SIM lock held.
Instead of single CAM SWI thread, used for commands completion processing
before, use multiple (depending on number of CPUs) threads. Load balanced
between them using "hash" of the device B:T:L address.
HBA drivers that can drop SIM lock during completion processing and have
sufficient number of completion threads to efficiently scale to multiple
CPUs can use new function xpt_done_direct() to avoid extra context switch.
Make ahci(4) driver to use this mechanism depending on hardware setup.
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-21 12:00:26 +00:00
|
|
|
mtx_destroy(&devq->send_mtx);
|
2005-07-01 15:21:30 +00:00
|
|
|
free(devq, M_CAMDEVQ);
|
1998-09-15 06:33:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
u_int32_t
|
|
|
|
cam_devq_resize(struct cam_devq *camq, int devices)
|
|
|
|
{
|
|
|
|
u_int32_t retval;
|
|
|
|
|
MFprojects/camlock r248890, r248897, r248898, r248900, r248903, r248905,
r248917, r248918, r248978, r249001, r249014, r249030:
Remove multilevel freezing mechanism, implemented to handle specifics of
the ATA/SATA error recovery, when post-reset recovery commands should be
allocated when queues are already full of payload requests. Instead of
removing frozen CCBs with specified range of priorities from the queue
to provide free openings, use simple hack, allowing explicit CCBs over-
allocation for requests with priority higher (numerically lower) then
CAM_PRIORITY_OOB threshold.
Simplify CCB allocation logic by removing SIM-level allocation queue.
After that SIM-level queue manages only CCBs execution, while allocation
logic is localized within each single device.
Suggested by: gibbs
2013-04-14 09:28:14 +00:00
|
|
|
retval = camq_resize(&camq->send_queue, devices);
|
1998-09-15 06:33:23 +00:00
|
|
|
return (retval);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct cam_ccbq *
|
|
|
|
cam_ccbq_alloc(int openings)
|
|
|
|
{
|
|
|
|
struct cam_ccbq *ccbq;
|
|
|
|
|
2005-07-01 15:21:30 +00:00
|
|
|
ccbq = (struct cam_ccbq *)malloc(sizeof(*ccbq), M_CAMCCBQ, M_NOWAIT);
|
1998-09-15 06:33:23 +00:00
|
|
|
if (ccbq == NULL) {
|
|
|
|
printf("cam_ccbq_alloc: - cannot malloc!\n");
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
if (cam_ccbq_init(ccbq, openings) != 0) {
|
2005-07-01 15:21:30 +00:00
|
|
|
free(ccbq, M_CAMCCBQ);
|
1998-09-15 06:33:23 +00:00
|
|
|
return (NULL);
|
|
|
|
}
|
2020-09-01 22:13:48 +00:00
|
|
|
|
1998-09-15 06:33:23 +00:00
|
|
|
return (ccbq);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
cam_ccbq_free(struct cam_ccbq *ccbq)
|
|
|
|
{
|
|
|
|
if (ccbq) {
|
2009-10-22 21:07:32 +00:00
|
|
|
cam_ccbq_fini(ccbq);
|
2005-07-01 15:21:30 +00:00
|
|
|
free(ccbq, M_CAMCCBQ);
|
1998-09-15 06:33:23 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
u_int32_t
|
|
|
|
cam_ccbq_resize(struct cam_ccbq *ccbq, int new_size)
|
|
|
|
{
|
|
|
|
int delta;
|
|
|
|
|
|
|
|
delta = new_size - (ccbq->dev_active + ccbq->dev_openings);
|
Merge CAM locking changes from the projects/camlock branch to radically
reduce lock congestion and improve SMP scalability of the SCSI/ATA stack,
preparing the ground for the coming next GEOM direct dispatch support.
Replace big per-SIM locks with bunch of smaller ones:
- per-LUN locks to protect device and peripheral drivers state;
- per-target locks to protect list of LUNs on target;
- per-bus locks to protect reference counting;
- per-send queue locks to protect queue of CCBs to be sent;
- per-done queue locks to protect queue of completed CCBs;
- remaining per-SIM locks now protect only HBA driver internals.
While holding LUN lock it is allowed (while not recommended for performance
reasons) to take SIM lock. The opposite acquisition order is forbidden.
All the other locks are leaf locks, that can be taken anywhere, but should
not be cascaded. Many functions, such as: xpt_action(), xpt_done(),
xpt_async(), xpt_create_path(), etc. are no longer require (but allow) SIM
lock to be held.
To keep compatibility and solve cases where SIM lock can't be dropped, all
xpt_async() calls in addition to xpt_done() calls are queued to completion
threads for async processing in clean environment without SIM lock held.
Instead of single CAM SWI thread, used for commands completion processing
before, use multiple (depending on number of CPUs) threads. Load balanced
between them using "hash" of the device B:T:L address.
HBA drivers that can drop SIM lock during completion processing and have
sufficient number of completion threads to efficiently scale to multiple
CPUs can use new function xpt_done_direct() to avoid extra context switch.
Make ahci(4) driver to use this mechanism depending on hardware setup.
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-21 12:00:26 +00:00
|
|
|
ccbq->total_openings += delta;
|
2013-08-05 11:48:40 +00:00
|
|
|
ccbq->dev_openings += delta;
|
1998-09-15 06:33:23 +00:00
|
|
|
|
2013-08-05 11:48:40 +00:00
|
|
|
new_size = imax(64, 1 << fls(new_size + new_size / 2));
|
|
|
|
if (new_size > ccbq->queue.array_size)
|
|
|
|
return (camq_resize(&ccbq->queue, new_size));
|
|
|
|
else
|
1998-09-15 06:33:23 +00:00
|
|
|
return (CAM_REQ_CMP);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
cam_ccbq_init(struct cam_ccbq *ccbq, int openings)
|
|
|
|
{
|
|
|
|
bzero(ccbq, sizeof(*ccbq));
|
2013-08-05 11:48:40 +00:00
|
|
|
if (camq_init(&ccbq->queue,
|
|
|
|
imax(64, 1 << fls(openings + openings / 2))) != 0)
|
1998-09-15 06:33:23 +00:00
|
|
|
return (1);
|
Merge CAM locking changes from the projects/camlock branch to radically
reduce lock congestion and improve SMP scalability of the SCSI/ATA stack,
preparing the ground for the coming next GEOM direct dispatch support.
Replace big per-SIM locks with bunch of smaller ones:
- per-LUN locks to protect device and peripheral drivers state;
- per-target locks to protect list of LUNs on target;
- per-bus locks to protect reference counting;
- per-send queue locks to protect queue of CCBs to be sent;
- per-done queue locks to protect queue of completed CCBs;
- remaining per-SIM locks now protect only HBA driver internals.
While holding LUN lock it is allowed (while not recommended for performance
reasons) to take SIM lock. The opposite acquisition order is forbidden.
All the other locks are leaf locks, that can be taken anywhere, but should
not be cascaded. Many functions, such as: xpt_action(), xpt_done(),
xpt_async(), xpt_create_path(), etc. are no longer require (but allow) SIM
lock to be held.
To keep compatibility and solve cases where SIM lock can't be dropped, all
xpt_async() calls in addition to xpt_done() calls are queued to completion
threads for async processing in clean environment without SIM lock held.
Instead of single CAM SWI thread, used for commands completion processing
before, use multiple (depending on number of CPUs) threads. Load balanced
between them using "hash" of the device B:T:L address.
HBA drivers that can drop SIM lock during completion processing and have
sufficient number of completion threads to efficiently scale to multiple
CPUs can use new function xpt_done_direct() to avoid extra context switch.
Make ahci(4) driver to use this mechanism depending on hardware setup.
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-21 12:00:26 +00:00
|
|
|
ccbq->total_openings = openings;
|
MFprojects/camlock r248890, r248897, r248898, r248900, r248903, r248905,
r248917, r248918, r248978, r249001, r249014, r249030:
Remove multilevel freezing mechanism, implemented to handle specifics of
the ATA/SATA error recovery, when post-reset recovery commands should be
allocated when queues are already full of payload requests. Instead of
removing frozen CCBs with specified range of priorities from the queue
to provide free openings, use simple hack, allowing explicit CCBs over-
allocation for requests with priority higher (numerically lower) then
CAM_PRIORITY_OOB threshold.
Simplify CCB allocation logic by removing SIM-level allocation queue.
After that SIM-level queue manages only CCBs execution, while allocation
logic is localized within each single device.
Suggested by: gibbs
2013-04-14 09:28:14 +00:00
|
|
|
ccbq->dev_openings = openings;
|
1998-09-15 06:33:23 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2009-10-22 21:07:32 +00:00
|
|
|
void
|
|
|
|
cam_ccbq_fini(struct cam_ccbq *ccbq)
|
|
|
|
{
|
|
|
|
|
|
|
|
camq_fini(&ccbq->queue);
|
|
|
|
}
|
|
|
|
|
1998-09-15 06:33:23 +00:00
|
|
|
/*
|
|
|
|
* Heap routines for manipulating CAM queues.
|
|
|
|
*/
|
|
|
|
/*
|
|
|
|
* queue_cmp: Given an array of cam_pinfo* elements and indexes i
|
|
|
|
* and j, return less than 0, 0, or greater than 0 if i is less than,
|
|
|
|
* equal too, or greater than j respectively.
|
|
|
|
*/
|
|
|
|
static __inline int
|
|
|
|
queue_cmp(cam_pinfo **queue_array, int i, int j)
|
|
|
|
{
|
|
|
|
if (queue_array[i]->priority == queue_array[j]->priority)
|
|
|
|
return ( queue_array[i]->generation
|
|
|
|
- queue_array[j]->generation );
|
|
|
|
else
|
|
|
|
return ( queue_array[i]->priority
|
|
|
|
- queue_array[j]->priority );
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* swap: Given an array of cam_pinfo* elements and indexes i and j,
|
|
|
|
* exchange elements i and j.
|
|
|
|
*/
|
|
|
|
static __inline void
|
|
|
|
swap(cam_pinfo **queue_array, int i, int j)
|
|
|
|
{
|
|
|
|
cam_pinfo *temp_qentry;
|
|
|
|
|
|
|
|
temp_qentry = queue_array[j];
|
|
|
|
queue_array[j] = queue_array[i];
|
|
|
|
queue_array[i] = temp_qentry;
|
|
|
|
queue_array[j]->index = j;
|
|
|
|
queue_array[i]->index = i;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* heap_up: Given an array of cam_pinfo* elements with the
|
1999-04-19 21:26:08 +00:00
|
|
|
* Heap(1, new_index-1) property and a new element in location
|
|
|
|
* new_index, output Heap(1, new_index).
|
1998-09-15 06:33:23 +00:00
|
|
|
*/
|
|
|
|
static void
|
|
|
|
heap_up(cam_pinfo **queue_array, int new_index)
|
|
|
|
{
|
|
|
|
int child;
|
|
|
|
int parent;
|
|
|
|
|
|
|
|
child = new_index;
|
|
|
|
|
1999-04-19 21:26:08 +00:00
|
|
|
while (child != 1) {
|
|
|
|
parent = child >> 1;
|
1998-09-15 06:33:23 +00:00
|
|
|
if (queue_cmp(queue_array, parent, child) <= 0)
|
|
|
|
break;
|
|
|
|
swap(queue_array, parent, child);
|
|
|
|
child = parent;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* heap_down: Given an array of cam_pinfo* elements with the
|
1999-04-19 21:26:08 +00:00
|
|
|
* Heap(index + 1, num_entries) property with index containing
|
|
|
|
* an unsorted entry, output Heap(index, num_entries).
|
1998-09-15 06:33:23 +00:00
|
|
|
*/
|
|
|
|
static void
|
|
|
|
heap_down(cam_pinfo **queue_array, int index, int num_entries)
|
|
|
|
{
|
|
|
|
int child;
|
|
|
|
int parent;
|
2020-09-01 22:13:48 +00:00
|
|
|
|
1998-09-15 06:33:23 +00:00
|
|
|
parent = index;
|
1999-04-19 21:26:08 +00:00
|
|
|
child = parent << 1;
|
|
|
|
for (; child <= num_entries; child = parent << 1) {
|
|
|
|
if (child < num_entries) {
|
1998-09-15 06:33:23 +00:00
|
|
|
/* child+1 is the right child of parent */
|
|
|
|
if (queue_cmp(queue_array, child + 1, child) < 0)
|
|
|
|
child++;
|
|
|
|
}
|
|
|
|
/* child is now the least child of parent */
|
|
|
|
if (queue_cmp(queue_array, parent, child) <= 0)
|
|
|
|
break;
|
|
|
|
swap(queue_array, child, parent);
|
|
|
|
parent = child;
|
|
|
|
}
|
|
|
|
}
|