amd64: clean up copyin/copyout

- move the PSL.AC comment to the fault handler
- stop testing for zero-sized ops. after several minutes of package
building there were no copyin calls with zero bytes and very few
copyout. the semantic of returning 0 in this case is preserved
- shorten exit paths by clearing %eax earlier
- replace xchg with 3 movs. this is what compilers do. a naive
benchmark on EPYC suggests about 1% increase in thoughput thanks to
this change.
- remove the useless movb %cl,%al from copyout. it looks like a
leftover from many years ago

Reviewed by:	kib
Approved by:	re (gjb)
Differential Revision:	https://reviews.freebsd.org/D17286
This commit is contained in:
Mateusz Guzik 2018-09-27 15:24:16 +00:00
parent 9872237d21
commit 0e59ecce47

View File

@ -404,10 +404,7 @@ END(fillw)
.macro COPYOUT smap erms
PUSH_FRAME_POINTER
movq PCPU(CURPCB),%r9
/* Trap entry clears PSL.AC */
movq $copy_fault,PCB_ONFAULT(%r9)
testq %rdx,%rdx /* anything to do? */
jz 2f
/*
* Check explicitly for non-user addresses. If 486 write protection
@ -432,10 +429,20 @@ END(fillw)
cmpq %rcx,%rax
ja copy_fault
xchgq %rdi,%rsi
/* bcopy(%rsi, %rdi, %rdx) */
/*
* Set up arguments for rep movs*.
*/
movq %rdi,%r8
movq %rsi,%rdi
movq %r8,%rsi
movq %rdx,%rcx
/*
* Set return value to zero. Remaining failure mode goes through
* copy_fault.
*/
xorl %eax,%eax
SMAP_DISABLE \smap
.if \erms == 0
cmpq $15,%rcx
@ -447,17 +454,16 @@ END(fillw)
andb $7,%cl
jne 1f
SMAP_ENABLE \smap
xorl %eax,%eax
movq %rax,PCB_ONFAULT(%r9)
POP_FRAME_POINTER
ret
.endif
ALIGN_TEXT
1:
rep
movsb
SMAP_ENABLE \smap
2:
xorl %eax,%eax
movq %rax,PCB_ONFAULT(%r9)
POP_FRAME_POINTER
ret
@ -487,8 +493,6 @@ END(copyout_smap_erms)
PUSH_FRAME_POINTER
movq PCPU(CURPCB),%r9
movq $copy_fault,PCB_ONFAULT(%r9)
testq %rdx,%rdx /* anything to do? */
jz 2f
/*
* make sure address is valid
@ -500,9 +504,12 @@ END(copyout_smap_erms)
cmpq %rcx,%rax
ja copy_fault
xchgq %rdi,%rsi
movq %rdi,%r8
movq %rsi,%rdi
movq %r8,%rsi
movq %rdx,%rcx
movb %cl,%al
xorl %eax,%eax
SMAP_DISABLE \smap
.if \erms == 0
@ -511,22 +518,20 @@ END(copyout_smap_erms)
shrq $3,%rcx /* copy longword-wise */
rep
movsq
movb %al,%cl
movb %dl,%cl
andb $7,%cl /* copy remaining bytes */
jne 1f
SMAP_ENABLE \smap
xorl %eax,%eax
movq %rax,PCB_ONFAULT(%r9)
POP_FRAME_POINTER
ret
.endif
ALIGN_TEXT
1:
rep
movsb
SMAP_ENABLE \smap
2:
xorl %eax,%eax
movq %rax,PCB_ONFAULT(%r9)
POP_FRAME_POINTER
ret
@ -549,9 +554,9 @@ ENTRY(copyin_smap_erms)
END(copyin_smap_erms)
ALIGN_TEXT
/* Trap entry clears PSL.AC */
copy_fault:
movq PCPU(CURPCB),%rdx
movq $0,PCB_ONFAULT(%rdx)
movq $0,PCB_ONFAULT(%r9)
movl $EFAULT,%eax
POP_FRAME_POINTER
ret