freebsd-skq/sys/dev/raidframe/rf_paritylog.c
Scott Long f9d186edc8 After much delay and anticipation, welcome RAIDFrame into the FreeBSD
world.  This should be considered highly experimental.

Approved-by:	re
2002-10-20 08:17:39 +00:00

870 lines
28 KiB
C

/* $FreeBSD$ */
/* $NetBSD: rf_paritylog.c,v 1.5 2000/01/07 03:41:01 oster Exp $ */
/*
* Copyright (c) 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: William V. Courtright II
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/* Code for manipulating in-core parity logs
*
*/
#include <dev/raidframe/rf_archs.h>
#if RF_INCLUDE_PARITYLOGGING > 0
/*
* Append-only log for recording parity "update" and "overwrite" records
*/
#include <dev/raidframe/rf_types.h>
#include <dev/raidframe/rf_threadstuff.h>
#include <dev/raidframe/rf_mcpair.h>
#include <dev/raidframe/rf_raid.h>
#include <dev/raidframe/rf_dag.h>
#include <dev/raidframe/rf_dagfuncs.h>
#include <dev/raidframe/rf_desc.h>
#include <dev/raidframe/rf_layout.h>
#include <dev/raidframe/rf_diskqueue.h>
#include <dev/raidframe/rf_etimer.h>
#include <dev/raidframe/rf_paritylog.h>
#include <dev/raidframe/rf_general.h>
#include <dev/raidframe/rf_map.h>
#include <dev/raidframe/rf_paritylogging.h>
#include <dev/raidframe/rf_paritylogDiskMgr.h>
static RF_CommonLogData_t *
AllocParityLogCommonData(RF_Raid_t * raidPtr)
{
RF_CommonLogData_t *common = NULL;
int rc;
/* Return a struct for holding common parity log information from the
* free list (rf_parityLogDiskQueue.freeCommonList). If the free list
* is empty, call RF_Malloc to create a new structure. NON-BLOCKING */
RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
if (raidPtr->parityLogDiskQueue.freeCommonList) {
common = raidPtr->parityLogDiskQueue.freeCommonList;
raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
} else {
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
rc = rf_mutex_init(&common->mutex, __FUNCTION__);
if (rc) {
RF_ERRORMSG3("Unable to init mutex file %s line %d rc=%d\n", __FILE__,
__LINE__, rc);
RF_Free(common, sizeof(RF_CommonLogData_t));
common = NULL;
}
}
common->next = NULL;
return (common);
}
static void
FreeParityLogCommonData(RF_CommonLogData_t * common)
{
RF_Raid_t *raidPtr;
/* Insert a single struct for holding parity log information (data)
* into the free list (rf_parityLogDiskQueue.freeCommonList).
* NON-BLOCKING */
raidPtr = common->raidPtr;
RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
common->next = raidPtr->parityLogDiskQueue.freeCommonList;
raidPtr->parityLogDiskQueue.freeCommonList = common;
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
}
static RF_ParityLogData_t *
AllocParityLogData(RF_Raid_t * raidPtr)
{
RF_ParityLogData_t *data = NULL;
/* Return a struct for holding parity log information from the free
* list (rf_parityLogDiskQueue.freeList). If the free list is empty,
* call RF_Malloc to create a new structure. NON-BLOCKING */
RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
if (raidPtr->parityLogDiskQueue.freeDataList) {
data = raidPtr->parityLogDiskQueue.freeDataList;
raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
} else {
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
}
data->next = NULL;
data->prev = NULL;
return (data);
}
static void
FreeParityLogData(RF_ParityLogData_t * data)
{
RF_ParityLogData_t *nextItem;
RF_Raid_t *raidPtr;
/* Insert a linked list of structs for holding parity log information
* (data) into the free list (parityLogDiskQueue.freeList).
* NON-BLOCKING */
raidPtr = data->common->raidPtr;
RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
while (data) {
nextItem = data->next;
data->next = raidPtr->parityLogDiskQueue.freeDataList;
raidPtr->parityLogDiskQueue.freeDataList = data;
data = nextItem;
}
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
}
static void
EnqueueParityLogData(
RF_ParityLogData_t * data,
RF_ParityLogData_t ** head,
RF_ParityLogData_t ** tail)
{
RF_Raid_t *raidPtr;
/* Insert an in-core parity log (*data) into the head of a disk queue
* (*head, *tail). NON-BLOCKING */
raidPtr = data->common->raidPtr;
if (rf_parityLogDebug)
printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
RF_ASSERT(data->prev == NULL);
RF_ASSERT(data->next == NULL);
RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
if (*head) {
/* insert into head of queue */
RF_ASSERT((*head)->prev == NULL);
RF_ASSERT((*tail)->next == NULL);
data->next = *head;
(*head)->prev = data;
*head = data;
} else {
/* insert into empty list */
RF_ASSERT(*head == NULL);
RF_ASSERT(*tail == NULL);
*head = data;
*tail = data;
}
RF_ASSERT((*head)->prev == NULL);
RF_ASSERT((*tail)->next == NULL);
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
}
static RF_ParityLogData_t *
DequeueParityLogData(
RF_Raid_t * raidPtr,
RF_ParityLogData_t ** head,
RF_ParityLogData_t ** tail,
int ignoreLocks)
{
RF_ParityLogData_t *data;
/* Remove and return an in-core parity log from the tail of a disk
* queue (*head, *tail). NON-BLOCKING */
/* remove from tail, preserving FIFO order */
if (!ignoreLocks)
RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
data = *tail;
if (data) {
if (*head == *tail) {
/* removing last item from queue */
*head = NULL;
*tail = NULL;
} else {
*tail = (*tail)->prev;
(*tail)->next = NULL;
RF_ASSERT((*head)->prev == NULL);
RF_ASSERT((*tail)->next == NULL);
}
data->next = NULL;
data->prev = NULL;
if (rf_parityLogDebug)
printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
}
if (*head) {
RF_ASSERT((*head)->prev == NULL);
RF_ASSERT((*tail)->next == NULL);
}
if (!ignoreLocks)
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
return (data);
}
static void
RequeueParityLogData(
RF_ParityLogData_t * data,
RF_ParityLogData_t ** head,
RF_ParityLogData_t ** tail)
{
RF_Raid_t *raidPtr;
/* Insert an in-core parity log (*data) into the tail of a disk queue
* (*head, *tail). NON-BLOCKING */
raidPtr = data->common->raidPtr;
RF_ASSERT(data);
if (rf_parityLogDebug)
printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
if (*tail) {
/* append to tail of list */
data->prev = *tail;
data->next = NULL;
(*tail)->next = data;
*tail = data;
} else {
/* inserting into an empty list */
*head = data;
*tail = data;
(*head)->prev = NULL;
(*tail)->next = NULL;
}
RF_ASSERT((*head)->prev == NULL);
RF_ASSERT((*tail)->next == NULL);
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
}
RF_ParityLogData_t *
rf_CreateParityLogData(
RF_ParityRecordType_t operation,
RF_PhysDiskAddr_t * pda,
caddr_t bufPtr,
RF_Raid_t * raidPtr,
int (*wakeFunc) (RF_DagNode_t * node, int status),
void *wakeArg,
RF_AccTraceEntry_t * tracerec,
RF_Etimer_t startTime)
{
RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
RF_CommonLogData_t *common;
RF_PhysDiskAddr_t *diskAddress;
int boundary, offset = 0;
/* Return an initialized struct of info to be logged. Build one item
* per physical disk address, one item per region.
*
* NON-BLOCKING */
diskAddress = pda;
common = AllocParityLogCommonData(raidPtr);
RF_ASSERT(common);
common->operation = operation;
common->bufPtr = bufPtr;
common->raidPtr = raidPtr;
common->wakeFunc = wakeFunc;
common->wakeArg = wakeArg;
common->tracerec = tracerec;
common->startTime = startTime;
common->cnt = 0;
if (rf_parityLogDebug)
printf("[entering CreateParityLogData]\n");
while (diskAddress) {
common->cnt++;
data = AllocParityLogData(raidPtr);
RF_ASSERT(data);
data->common = common;
data->next = NULL;
data->prev = NULL;
data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) {
/* disk address does not cross a region boundary */
data->diskAddress = *diskAddress;
data->bufOffset = offset;
offset = offset + diskAddress->numSector;
EnqueueParityLogData(data, &resultHead, &resultTail);
/* adjust disk address */
diskAddress = diskAddress->next;
} else {
/* disk address crosses a region boundary */
/* find address where region is crossed */
boundary = 0;
while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
boundary++;
/* enter data before the boundary */
data->diskAddress = *diskAddress;
data->diskAddress.numSector = boundary;
data->bufOffset = offset;
offset += boundary;
EnqueueParityLogData(data, &resultHead, &resultTail);
/* adjust disk address */
diskAddress->startSector += boundary;
diskAddress->numSector -= boundary;
}
}
if (rf_parityLogDebug)
printf("[leaving CreateParityLogData]\n");
return (resultHead);
}
RF_ParityLogData_t *
rf_SearchAndDequeueParityLogData(
RF_Raid_t * raidPtr,
int regionID,
RF_ParityLogData_t ** head,
RF_ParityLogData_t ** tail,
int ignoreLocks)
{
RF_ParityLogData_t *w;
/* Remove and return an in-core parity log from a specified region
* (regionID). If a matching log is not found, return NULL.
*
* NON-BLOCKING. */
/* walk backward through a list, looking for an entry with a matching
* region ID */
if (!ignoreLocks)
RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
w = (*tail);
while (w) {
if (w->regionID == regionID) {
/* remove an element from the list */
if (w == *tail) {
if (*head == *tail) {
/* removing only element in the list */
*head = NULL;
*tail = NULL;
} else {
/* removing last item in the list */
*tail = (*tail)->prev;
(*tail)->next = NULL;
RF_ASSERT((*head)->prev == NULL);
RF_ASSERT((*tail)->next == NULL);
}
} else {
if (w == *head) {
/* removing first item in the list */
*head = (*head)->next;
(*head)->prev = NULL;
RF_ASSERT((*head)->prev == NULL);
RF_ASSERT((*tail)->next == NULL);
} else {
/* removing an item from the middle of
* the list */
w->prev->next = w->next;
w->next->prev = w->prev;
RF_ASSERT((*head)->prev == NULL);
RF_ASSERT((*tail)->next == NULL);
}
}
w->prev = NULL;
w->next = NULL;
if (rf_parityLogDebug)
printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector);
return (w);
} else
w = w->prev;
}
if (!ignoreLocks)
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
return (NULL);
}
static RF_ParityLogData_t *
DequeueMatchingLogData(
RF_Raid_t * raidPtr,
RF_ParityLogData_t ** head,
RF_ParityLogData_t ** tail)
{
RF_ParityLogData_t *logDataList, *logData;
int regionID;
/* Remove and return an in-core parity log from the tail of a disk
* queue (*head, *tail). Then remove all matching (identical
* regionIDs) logData and return as a linked list.
*
* NON-BLOCKING */
logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
if (logDataList) {
regionID = logDataList->regionID;
logData = logDataList;
logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
while (logData->next) {
logData = logData->next;
logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
}
}
return (logDataList);
}
static RF_ParityLog_t *
AcquireParityLog(
RF_ParityLogData_t * logData,
int finish)
{
RF_ParityLog_t *log = NULL;
RF_Raid_t *raidPtr;
/* Grab a log buffer from the pool and return it. If no buffers are
* available, return NULL. NON-BLOCKING */
raidPtr = logData->common->raidPtr;
RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
if (raidPtr->parityLogPool.parityLogs) {
log = raidPtr->parityLogPool.parityLogs;
raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
log->regionID = logData->regionID;
log->numRecords = 0;
log->next = NULL;
raidPtr->logsInUse++;
RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
} else {
/* no logs available, so place ourselves on the queue of work
* waiting on log buffers this is done while
* parityLogPool.mutex is held, to ensure synchronization with
* ReleaseParityLogs. */
if (rf_parityLogDebug)
printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
if (finish)
RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
else
EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
}
RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
return (log);
}
void
rf_ReleaseParityLogs(
RF_Raid_t * raidPtr,
RF_ParityLog_t * firstLog)
{
RF_ParityLogData_t *logDataList;
RF_ParityLog_t *log, *lastLog;
int cnt;
/* Insert a linked list of parity logs (firstLog) to the free list
* (parityLogPool.parityLogPool)
*
* NON-BLOCKING. */
RF_ASSERT(firstLog);
/* Before returning logs to global free list, service all requests
* which are blocked on logs. Holding mutexes for parityLogPool and
* parityLogDiskQueue forces synchronization with AcquireParityLog(). */
RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
log = firstLog;
if (firstLog)
firstLog = firstLog->next;
log->numRecords = 0;
log->next = NULL;
while (logDataList && log) {
RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
if (rf_parityLogDebug)
printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
if (log == NULL) {
log = firstLog;
if (firstLog) {
firstLog = firstLog->next;
log->numRecords = 0;
log->next = NULL;
}
}
RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
if (log)
logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
}
/* return remaining logs to pool */
if (log) {
log->next = firstLog;
firstLog = log;
}
if (firstLog) {
lastLog = firstLog;
raidPtr->logsInUse--;
RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
while (lastLog->next) {
lastLog = lastLog->next;
raidPtr->logsInUse--;
RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
}
lastLog->next = raidPtr->parityLogPool.parityLogs;
raidPtr->parityLogPool.parityLogs = firstLog;
cnt = 0;
log = raidPtr->parityLogPool.parityLogs;
while (log) {
cnt++;
log = log->next;
}
RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
}
RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
}
static void
ReintLog(
RF_Raid_t * raidPtr,
int regionID,
RF_ParityLog_t * log)
{
RF_ASSERT(log);
/* Insert an in-core parity log (log) into the disk queue of
* reintegration work. Set the flag (reintInProgress) for the
* specified region (regionID) to indicate that reintegration is in
* progress for this region. NON-BLOCKING */
RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE; /* cleared when reint
* complete */
if (rf_parityLogDebug)
printf("[requesting reintegration of region %d]\n", log->regionID);
/* move record to reintegration queue */
RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
log->next = raidPtr->parityLogDiskQueue.reintQueue;
raidPtr->parityLogDiskQueue.reintQueue = log;
RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
}
static void
FlushLog(
RF_Raid_t * raidPtr,
RF_ParityLog_t * log)
{
/* insert a core log (log) into a list of logs
* (parityLogDiskQueue.flushQueue) waiting to be written to disk.
* NON-BLOCKING */
RF_ASSERT(log);
RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
RF_ASSERT(log->next == NULL);
/* move log to flush queue */
RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
log->next = raidPtr->parityLogDiskQueue.flushQueue;
raidPtr->parityLogDiskQueue.flushQueue = log;
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
}
static int
DumpParityLogToDisk(
int finish,
RF_ParityLogData_t * logData)
{
int i, diskCount, regionID = logData->regionID;
RF_ParityLog_t *log;
RF_Raid_t *raidPtr;
raidPtr = logData->common->raidPtr;
/* Move a core log to disk. If the log disk is full, initiate
* reintegration.
*
* Return (0) if we can enqueue the dump immediately, otherwise return
* (1) to indicate we are blocked on reintegration and control of the
* thread should be relinquished.
*
* Caller must hold regionInfo[regionID].mutex
*
* NON-BLOCKING */
if (rf_parityLogDebug)
printf("[dumping parity log to disk, region %d]\n", regionID);
log = raidPtr->regionInfo[regionID].coreLog;
RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
RF_ASSERT(log->next == NULL);
/* if reintegration is in progress, must queue work */
RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
if (raidPtr->regionInfo[regionID].reintInProgress) {
/* Can not proceed since this region is currently being
* reintegrated. We can not block, so queue remaining work and
* return */
if (rf_parityLogDebug)
printf("[region %d waiting on reintegration]\n", regionID);
/* XXX not sure about the use of finish - shouldn't this
* always be "Enqueue"? */
if (finish)
RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
else
EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
return (1); /* relenquish control of this thread */
}
RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
raidPtr->regionInfo[regionID].coreLog = NULL;
if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
/* IMPORTANT!! this loop bound assumes region disk holds an
* integral number of core logs */
{
/* update disk map for this region */
diskCount = raidPtr->regionInfo[regionID].diskCount;
for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
}
log->diskOffset = diskCount;
raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
FlushLog(raidPtr, log);
} else {
/* no room for log on disk, send it to disk manager and
* request reintegration */
RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
ReintLog(raidPtr, regionID, log);
}
if (rf_parityLogDebug)
printf("[finished dumping parity log to disk, region %d]\n", regionID);
return (0);
}
int
rf_ParityLogAppend(
RF_ParityLogData_t * logData,
int finish,
RF_ParityLog_t ** incomingLog,
int clearReintFlag)
{
int regionID, logItem, itemDone;
RF_ParityLogData_t *item;
int punt, done = RF_FALSE;
RF_ParityLog_t *log;
RF_Raid_t *raidPtr;
RF_Etimer_t timer;
int (*wakeFunc) (RF_DagNode_t * node, int status);
void *wakeArg;
/* Add parity to the appropriate log, one sector at a time. This
* routine is called is called by dag functions ParityLogUpdateFunc
* and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
*
* Parity to be logged is contained in a linked-list (logData). When
* this routine returns, every sector in the list will be in one of
* three places: 1) entered into the parity log 2) queued, waiting on
* reintegration 3) queued, waiting on a core log
*
* Blocked work is passed to the ParityLoggingDiskManager for completion.
* Later, as conditions which required the block are removed, the work
* reenters this routine with the "finish" parameter set to "RF_TRUE."
*
* NON-BLOCKING */
raidPtr = logData->common->raidPtr;
/* lock the region for the first item in logData */
RF_ASSERT(logData != NULL);
regionID = logData->regionID;
RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
if (clearReintFlag) {
/* Enable flushing for this region. Holding both locks
* provides a synchronization barrier with DumpParityLogToDisk */
RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
raidPtr->regionInfo[regionID].diskCount = 0;
raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex); /* flushing is now
* enabled */
RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
}
/* process each item in logData */
while (logData) {
/* remove an item from logData */
item = logData;
logData = logData->next;
item->next = NULL;
item->prev = NULL;
if (rf_parityLogDebug)
printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector);
/* see if we moved to a new region */
if (regionID != item->regionID) {
RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
regionID = item->regionID;
RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
}
punt = RF_FALSE;/* Set to RF_TRUE if work is blocked. This
* can happen in one of two ways: 1) no core
* log (AcquireParityLog) 2) waiting on
* reintegration (DumpParityLogToDisk) If punt
* is RF_TRUE, the dataItem was queued, so
* skip to next item. */
/* process item, one sector at a time, until all sectors
* processed or we punt */
if (item->diskAddress.numSector > 0)
done = RF_FALSE;
else
RF_ASSERT(0);
while (!punt && !done) {
/* verify that a core log exists for this region */
if (!raidPtr->regionInfo[regionID].coreLog) {
/* Attempt to acquire a parity log. If
* acquisition fails, queue remaining work in
* data item and move to nextItem. */
if (incomingLog)
if (*incomingLog) {
RF_ASSERT((*incomingLog)->next == NULL);
raidPtr->regionInfo[regionID].coreLog = *incomingLog;
raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
*incomingLog = NULL;
} else
raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
else
raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
/* Note: AcquireParityLog either returns a log
* or enqueues currentItem */
}
if (!raidPtr->regionInfo[regionID].coreLog)
punt = RF_TRUE; /* failed to find a core log */
else {
RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
/* verify that the log has room for new
* entries */
/* if log is full, dump it to disk and grab a
* new log */
if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) {
/* log is full, dump it to disk */
if (DumpParityLogToDisk(finish, item))
punt = RF_TRUE; /* dump unsuccessful,
* blocked on
* reintegration */
else {
/* dump was successful */
if (incomingLog)
if (*incomingLog) {
RF_ASSERT((*incomingLog)->next == NULL);
raidPtr->regionInfo[regionID].coreLog = *incomingLog;
raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
*incomingLog = NULL;
} else
raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
else
raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
/* if a core log is not
* available, must queue work
* and return */
if (!raidPtr->regionInfo[regionID].coreLog)
punt = RF_TRUE; /* blocked on log
* availability */
}
}
}
/* if we didn't punt on this item, attempt to add a
* sector to the core log */
if (!punt) {
RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
/* at this point, we have a core log with
* enough room for a sector */
/* copy a sector into the log */
log = raidPtr->regionInfo[regionID].coreLog;
RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
logItem = log->numRecords++;
log->records[logItem].parityAddr = item->diskAddress;
RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
log->records[logItem].parityAddr.numSector = 1;
log->records[logItem].operation = item->common->operation;
bcopy((item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (1 << item->common->raidPtr->logBytesPerSector));
item->diskAddress.numSector--;
item->diskAddress.startSector++;
if (item->diskAddress.numSector == 0)
done = RF_TRUE;
}
}
if (!punt) {
/* Processed this item completely, decrement count of
* items to be processed. */
RF_ASSERT(item->diskAddress.numSector == 0);
RF_LOCK_MUTEX(item->common->mutex);
item->common->cnt--;
if (item->common->cnt == 0)
itemDone = RF_TRUE;
else
itemDone = RF_FALSE;
RF_UNLOCK_MUTEX(item->common->mutex);
if (itemDone) {
/* Finished processing all log data for this
* IO Return structs to free list and invoke
* wakeup function. */
timer = item->common->startTime; /* grab initial value of
* timer */
RF_ETIMER_STOP(timer);
RF_ETIMER_EVAL(timer);
item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
if (rf_parityLogDebug)
printf("[waking process for region %d]\n", item->regionID);
wakeFunc = item->common->wakeFunc;
wakeArg = item->common->wakeArg;
FreeParityLogCommonData(item->common);
FreeParityLogData(item);
(wakeFunc) (wakeArg, 0);
} else
FreeParityLogData(item);
}
}
RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
if (rf_parityLogDebug)
printf("[exiting ParityLogAppend]\n");
return (0);
}
void
rf_EnableParityLogging(RF_Raid_t * raidPtr)
{
int regionID;
for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
}
if (rf_parityLogDebug)
printf("[parity logging enabled]\n");
}
#endif /* RF_INCLUDE_PARITYLOGGING > 0 */