From 73ffb5e8a4db0b4cb48d5cfa1f3b641b252c3757 Mon Sep 17 00:00:00 2001 From: Wojciech Macek Date: Mon, 4 Apr 2016 07:11:33 +0000 Subject: [PATCH] Add bzero.S to ARM64 machdep Add fille missing from https://svnweb.freebsd.org/changeset/base/297536 --- sys/arm64/arm64/bzero.S | 206 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100644 sys/arm64/arm64/bzero.S diff --git a/sys/arm64/arm64/bzero.S b/sys/arm64/arm64/bzero.S new file mode 100644 index 000000000000..60ac97e87b23 --- /dev/null +++ b/sys/arm64/arm64/bzero.S @@ -0,0 +1,206 @@ +/*- + * Copyright (C) 2016 Cavium Inc. + * All rights reserved. + * + * Developed by Semihalf. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + + +#include "assym.s" + + /* + * void bzero(void *p, size_t size) + * + * x0 - p + * x1 - size + */ +ENTRY(bzero) + cbz x1, ending + + /* + * x5 is number of cache lines to zero - calculated later and + * will become non-zero if buffer is long enough to zero by + * cache lines (and if it is allowed.) + * We need to zero it before proceeding with buffers of size + * smaller than 16 bytes - otherwise the x5 will not be + * calculated and will retain random value. + * "normal" is used for buffers <= 16 bytes and to align buffer + * to cache line for buffers bigger than cache line; non-0 x5 + * after "normal" has completed indicates that it has been used + * to align buffer to cache line and now zero by cache lines will + * be performed, and x5 is amount of cache lines to loop through. + */ + mov x5, xzr + + /* No use of cache assisted zero for buffers with size <= 16 */ + cmp x1, #0x10 + b.le normal + + /* + * Load size of line that will be cleaned by dc zva call. + * 0 means that the instruction is not allowed + */ + ldr x7, =dczva_line_size + ldr x7, [x7] + cbz x7, normal + + /* + * Buffer must be larger than cache line for using cache zeroing + * (and cache line aligned but this is checked after jump) + */ + cmp x1, x7 + b.lt normal + + /* + * Calculate number of bytes to cache aligned address (x4) nad + * number of full cache lines (x5). x6 is final address to zero. + */ + sub x2, x7, #0x01 + mov x3, -1 + eor x3, x3, x2 + add x4, x0, x2 + and x4, x4, x3 + subs x4, x4, x0 + b.eq normal + + /* Calculate number of "lines" in buffer */ + sub x5, x1, x4 + rbit x2, x7 + clz x2, x2 + lsr x5, x5, x2 + + /* + * If number of cache lines is 0, we will not be able to zero + * by cache lines, so go normal way. + */ + cbz x5, normal + /* x6 is final address to zero */ + add x6, x0, x1 + + /* + * We are here because x5 is non-0 so normal will be used to + * align buffer before cache zeroing. x4 holds number of bytes + * needed for alignment. + */ + mov x1, x4 + + /* When jumping here: x0 holds pointer, x1 holds size */ +normal: + /* + * Get buffer offset into 16 byte aligned address; 0 means pointer + * is aligned. + */ + ands x2, x0, #0x0f + b.eq aligned_to_16 + /* Calculate one-byte loop runs to 8 byte aligned address. */ + ands x2, x2, #0x07 + mov x3, #0x08 + sub x2, x3, x2 + /* x2 is number of bytes missing for alignment, x1 is buffer size */ + cmp x1, x2 + csel x2, x1, x2, le + sub x1, x1, x2 + + /* + * Byte by byte copy will copy at least enough bytes to align + * pointer and at most "size". + */ +align: + strb wzr, [x0], #0x01 + subs x2, x2, #0x01 + b.ne align + + /* Now pointer is aligned to 8 bytes */ + cmp x1, #0x10 + b.lt lead_out + /* + * Check if copy of another 8 bytes is needed to align to 16 byte + * address and do it + */ + tbz x0, #0x03, aligned_to_16 + str xzr, [x0], #0x08 + sub x1, x1, #0x08 + + /* While jumping here: x0 is 16 byte alligned address, x1 is size */ +aligned_to_16: + /* If size is less than 16 bytes, use lead_out to copy what remains */ + cmp x1, #0x10 + b.lt lead_out + + lsr x2, x1, #0x04 +zero_by_16: + stp xzr, xzr, [x0], #0x10 + subs x2, x2, #0x01 + b.ne zero_by_16 + + /* + * Lead out requires addresses to be aligned to 8 bytes. It is used to + * zero buffers with sizes < 16 and what can not be zeroed by + * zero_by_16 loop. + */ + ands x1, x1, #0x0f + b.eq lead_out_end +lead_out: + tbz x1, #0x03, lead_out_dword + str xzr, [x0], #0x08 +lead_out_dword: + tbz x1, #0x02, lead_out_word + str wzr, [x0], #0x04 +lead_out_word: + tbz x1, #0x01, lead_out_byte + strh wzr, [x0], #0x02 +lead_out_byte: + tbz x1, #0x00, lead_out_end + strb wzr, [x0], #0x01 + +lead_out_end: + /* + * If x5 is non-zero, this means that normal has been used as + * a lead in to align buffer address to cache size + */ + cbz x5, ending + + /* + * Here x5 holds number of lines to zero; x6 is final address of + * buffer. x0 is cache line aligned pointer. x7 is cache line size + * in bytes + */ +cache_line_zero: + dc zva, x0 + add x0, x0, x7 + subs x5, x5, #0x01 + b.ne cache_line_zero + + /* Need to zero remaining bytes? */ + subs x1, x6, x0 + b.ne normal + +ending: + ret + +END(bzero) +