arm64: bzero optimization

This optimization attempts to utylize as wide as possible register store instructions to zero large buffers.
The implementation, if possible, will use 'dc zva' to zero buffer by cache lines.

Speedup: 60x faster memory zeroing

Submitted by:          Dominik Ermel <der@semihalf.com>
Obtained from:         Semihalf
Sponsored by:          Cavium
Reviewed by:           kib
Differential Revision: https://reviews.freebsd.org/D5726
This commit is contained in:
Wojciech Macek 2016-04-04 07:06:20 +00:00
parent 841ecd471a
commit db27818234
3 changed files with 21 additions and 11 deletions

View File

@ -108,6 +108,7 @@ struct kva_md_info kmi;
int64_t dcache_line_size; /* The minimum D cache line size */
int64_t icache_line_size; /* The minimum I cache line size */
int64_t idcache_line_size; /* The minimum cache line size */
int64_t dczva_line_size; /* The size of cache line the dc zva zeroes */
static void
cpu_startup(void *dummy)
@ -129,16 +130,6 @@ cpu_idle_wakeup(int cpu)
return (0);
}
void
bzero(void *buf, size_t len)
{
uint8_t *p;
p = buf;
while(len-- > 0)
*p++ = 0;
}
int
fill_regs(struct thread *td, struct reg *regs)
{
@ -800,8 +791,9 @@ try_load_dtb(caddr_t kmdp)
static void
cache_setup(void)
{
int dcache_line_shift, icache_line_shift;
int dcache_line_shift, icache_line_shift, dczva_line_shift;
uint32_t ctr_el0;
uint32_t dczid_el0;
ctr_el0 = READ_SPECIALREG(ctr_el0);
@ -815,6 +807,17 @@ cache_setup(void)
icache_line_size = sizeof(int) << icache_line_shift;
idcache_line_size = MIN(dcache_line_size, icache_line_size);
dczid_el0 = READ_SPECIALREG(dczid_el0);
/* Check if dc zva is not prohibited */
if (dczid_el0 & DCZID_DZP)
dczva_line_size = 0;
else {
/* Same as with above calculations */
dczva_line_shift = DCZID_BS_SIZE(dczid_el0);
dczva_line_size = sizeof(int) << dczva_line_shift;
}
}
void

View File

@ -66,6 +66,12 @@
#define CTR_ILINE_MASK (0xf << CTR_ILINE_SHIFT)
#define CTR_ILINE_SIZE(reg) (((reg) & CTR_ILINE_MASK) >> CTR_ILINE_SHIFT)
/* DCZID_EL0 - Data Cache Zero ID register */
#define DCZID_DZP (1 << 4) /* DC ZVA prohibited if non-0 */
#define DCZID_BS_SHIFT 0
#define DCZID_BS_MASK (0xf << DCZID_BS_SHIFT)
#define DCZID_BS_SIZE(reg) (((reg) & DCZID_BS_MASK) >> DCZID_BS_SHIFT)
/* ESR_ELx */
#define ESR_ELx_ISS_MASK 0x00ffffff
#define ISS_INSN_FnV (0x01 << 10)

View File

@ -12,6 +12,7 @@ arm64/arm64/bus_machdep.c standard
arm64/arm64/bus_space_asm.S standard
arm64/arm64/busdma_bounce.c standard
arm64/arm64/busdma_machdep.c standard
arm64/arm64/bzero.S standard
arm64/arm64/clock.c standard
arm64/arm64/copyinout.S standard
arm64/arm64/copystr.c standard