loader: bcache read ahead block count should take account the large sectors

The loader bcache is implementing simple read-ahead to boost the cache.
The bcache is built based on 512B block sizes, and the read ahead is attempting
to read number of cache blocks, based on amount of the free bcache space.

However, there are devices using larger sector sizes than 512B, most obviously
the CD media is based on 2k sectors. This means the read-ahead can not be just
random number of blocks, but we should use value suitable also for use with
larger sectors, as for example, with CD devices, we should read multiple of 2KB.
Since the sector size from disk interface is not too reliable, i guess we can
just use "good enough" value, so the implementation is rounding down the read
ahead block count to be multiple of 16.

This means we have covered sector sizes to 8k.

In addition, the update does implement the end of cache marker, to help to
detect the possible memory corruption - I have not seen it happening so far,
but it does not hurt to have the detection mechanism in place.

Reviewed by:	allanjude
Approved by:	allanjude (mentor)
Differential Revision:	https://reviews.freebsd.org/D9179
This commit is contained in:
Toomas Soome 2017-02-06 08:58:40 +00:00
parent fff006f7c0
commit 7a60826b65
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=313332
2 changed files with 51 additions and 9 deletions

View File

@ -64,7 +64,7 @@ struct bcachectl
struct bcache {
struct bcachectl *bcache_ctl;
caddr_t bcache_data;
u_int bcache_nblks;
size_t bcache_nblks;
size_t ra;
};
@ -86,6 +86,7 @@ static u_int bcache_rablks;
((bc)->bcache_ctl[BHASH((bc), (blkno))].bc_blkno != (blkno))
#define BCACHE_READAHEAD 256
#define BCACHE_MINREADAHEAD 32
#define BCACHE_MARKER 0xdeadbeef
static void bcache_invalidate(struct bcache *bc, daddr_t blkno);
static void bcache_insert(struct bcache *bc, daddr_t blkno);
@ -95,7 +96,7 @@ static void bcache_free_instance(struct bcache *bc);
* Initialise the cache for (nblks) of (bsize).
*/
void
bcache_init(u_int nblks, size_t bsize)
bcache_init(size_t nblks, size_t bsize)
{
/* set up control data */
bcache_total_nblks = nblks;
@ -122,6 +123,7 @@ bcache_allocate(void)
u_int i;
struct bcache *bc = malloc(sizeof (struct bcache));
int disks = bcache_numdev;
uint32_t *marker;
if (disks == 0)
disks = 1; /* safe guard */
@ -140,11 +142,13 @@ bcache_allocate(void)
bc->bcache_nblks = bcache_total_nblks >> i;
bcache_unit_nblks = bc->bcache_nblks;
bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize);
bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize +
sizeof(uint32_t));
if (bc->bcache_data == NULL) {
/* dont error out yet. fall back to 32 blocks and try again */
bc->bcache_nblks = 32;
bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize);
bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize +
sizeof(uint32_t));
}
bc->bcache_ctl = malloc(bc->bcache_nblks * sizeof(struct bcachectl));
@ -152,8 +156,11 @@ bcache_allocate(void)
if ((bc->bcache_data == NULL) || (bc->bcache_ctl == NULL)) {
bcache_free_instance(bc);
errno = ENOMEM;
return(NULL);
return (NULL);
}
/* Insert cache end marker. */
marker = (uint32_t *)(bc->bcache_data + bc->bcache_nblks * bcache_blksize);
*marker = BCACHE_MARKER;
/* Flush the cache */
for (i = 0; i < bc->bcache_nblks; i++) {
@ -215,6 +222,9 @@ read_strategy(void *devdata, int rw, daddr_t blk, size_t size,
int result;
daddr_t p_blk;
caddr_t p_buf;
uint32_t *marker;
marker = (uint32_t *)(bc->bcache_data + bc->bcache_nblks * bcache_blksize);
if (bc == NULL) {
errno = ENODEV;
@ -261,9 +271,35 @@ read_strategy(void *devdata, int rw, daddr_t blk, size_t size,
p_size = MIN(r_size, nblk - i); /* read at least those blocks */
/*
* The read ahead size setup.
* While the read ahead can save us IO, it also can complicate things:
* 1. We do not want to read ahead by wrapping around the
* bcache end - this would complicate the cache management.
* 2. We are using bc->ra as dynamic hint for read ahead size,
* detected cache hits will increase the read-ahead block count, and
* misses will decrease, see the code above.
* 3. The bcache is sized by 512B blocks, however, the underlying device
* may have a larger sector size, and we should perform the IO by
* taking into account these larger sector sizes. We could solve this by
* passing the sector size to bcache_allocate(), or by using ioctl(), but
* in this version we are using the constant, 16 blocks, and are rounding
* read ahead block count down to multiple of 16.
* Using the constant has two reasons, we are not entirely sure if the
* BIOS disk interface is providing the correct value for sector size.
* And secondly, this way we get the most conservative setup for the ra.
*
* The selection of multiple of 16 blocks (8KB) is quite arbitrary, however,
* we want to have the CD (2K) and the 4K disks to be covered.
* Also, as we have 32 blocks to be allocated as the fallback value in the
* bcache_allocate(), the 16 ra blocks will allow the read ahead
* to be used even with bcache this small.
*/
ra = bc->bcache_nblks - BHASH(bc, p_blk + p_size);
if (ra != bc->bcache_nblks) { /* do we have RA space? */
ra = MIN(bc->ra, ra);
if (ra != 0 && ra != bc->bcache_nblks) { /* do we have RA space? */
ra = MIN(bc->ra, ra - 1);
ra = rounddown(ra, 16); /* multiple of 16 blocks */
p_size += ra;
}
@ -310,6 +346,12 @@ read_strategy(void *devdata, int rw, daddr_t blk, size_t size,
result = 0;
}
if (*marker != BCACHE_MARKER) {
printf("BUG: bcache corruption detected: nblks: %zu p_blk: %lu, "
"p_size: %zu, ra: %zu\n", bc->bcache_nblks,
(long unsigned)BHASH(bc, p_blk), p_size, ra);
}
done:
if ((result == 0) && (rsize != NULL))
*rsize = size;
@ -338,7 +380,7 @@ bcache_strategy(void *devdata, int rw, daddr_t blk, size_t size,
/* bypass large requests, or when the cache is inactive */
if (bc == NULL ||
((size * 2 / bcache_blksize) > bcache_nblks)) {
DEBUG("bypass %d from %d", size / bcache_blksize, blk);
DEBUG("bypass %zu from %qu", size / bcache_blksize, blk);
bcache_bypasses++;
return (dd->dv_strategy(dd->dv_devdata, rw, blk, size, buf, rsize));
}

View File

@ -73,7 +73,7 @@ int kern_pread(int fd, vm_offset_t dest, size_t len, off_t off);
void *alloc_pread(int fd, off_t off, size_t len);
/* bcache.c */
void bcache_init(u_int nblks, size_t bsize);
void bcache_init(size_t nblks, size_t bsize);
void bcache_add_dev(int);
void *bcache_allocate(void);
void bcache_free(void *);