Check the page table before TLB1 in pmap_kextract()
The vast majority of pmap_kextract() calls are looking for a physical memory address, not a device address. By checking the page table first this saves the formerly inevitable 64 (on e500mc and derivatives) iteration loop through TLB1 in the most common cases. Benchmarking this on the P5020 (e5500 core) yields a 300% throughput improvement on dtsec(4) (115Mbit/s -> 460Mbit/s) measured with iperf. Benchmarked on the P1022 (e500v2 core, 16 TLB1 entries) yields a 50% throughput improvement on tsec(4) (~93Mbit/s -> 165Mbit/s) measured with iperf. MFC after: 1 week Relnotes: Maybe (significant performance improvement)
This commit is contained in:
parent
e60ff6a3b4
commit
1ccb14588b
@ -2089,18 +2089,23 @@ static vm_paddr_t
|
||||
mmu_booke_kextract(mmu_t mmu, vm_offset_t va)
|
||||
{
|
||||
tlb_entry_t e;
|
||||
vm_paddr_t p;
|
||||
int i;
|
||||
|
||||
/* Check TLB1 mappings */
|
||||
for (i = 0; i < TLB1_ENTRIES; i++) {
|
||||
tlb1_read_entry(&e, i);
|
||||
if (!(e.mas1 & MAS1_VALID))
|
||||
continue;
|
||||
if (va >= e.virt && va < e.virt + e.size)
|
||||
return (e.phys + (va - e.virt));
|
||||
p = pte_vatopa(mmu, kernel_pmap, va);
|
||||
|
||||
if (p == 0) {
|
||||
/* Check TLB1 mappings */
|
||||
for (i = 0; i < TLB1_ENTRIES; i++) {
|
||||
tlb1_read_entry(&e, i);
|
||||
if (!(e.mas1 & MAS1_VALID))
|
||||
continue;
|
||||
if (va >= e.virt && va < e.virt + e.size)
|
||||
return (e.phys + (va - e.virt));
|
||||
}
|
||||
}
|
||||
|
||||
return (pte_vatopa(mmu, kernel_pmap, va));
|
||||
return (p);
|
||||
}
|
||||
|
||||
/*
|
||||
|
Loading…
x
Reference in New Issue
Block a user