Check the page table before TLB1 in pmap_kextract()
The vast majority of pmap_kextract() calls are looking for a physical memory address, not a device address. By checking the page table first this saves the formerly inevitable 64 (on e500mc and derivatives) iteration loop through TLB1 in the most common cases. Benchmarking this on the P5020 (e5500 core) yields a 300% throughput improvement on dtsec(4) (115Mbit/s -> 460Mbit/s) measured with iperf. Benchmarked on the P1022 (e500v2 core, 16 TLB1 entries) yields a 50% throughput improvement on tsec(4) (~93Mbit/s -> 165Mbit/s) measured with iperf. MFC after: 1 week Relnotes: Maybe (significant performance improvement)
This commit is contained in:
parent
89ecfb4cb7
commit
26e5200797
@ -2089,18 +2089,23 @@ static vm_paddr_t
|
||||
mmu_booke_kextract(mmu_t mmu, vm_offset_t va)
|
||||
{
|
||||
tlb_entry_t e;
|
||||
vm_paddr_t p;
|
||||
int i;
|
||||
|
||||
/* Check TLB1 mappings */
|
||||
for (i = 0; i < TLB1_ENTRIES; i++) {
|
||||
tlb1_read_entry(&e, i);
|
||||
if (!(e.mas1 & MAS1_VALID))
|
||||
continue;
|
||||
if (va >= e.virt && va < e.virt + e.size)
|
||||
return (e.phys + (va - e.virt));
|
||||
p = pte_vatopa(mmu, kernel_pmap, va);
|
||||
|
||||
if (p == 0) {
|
||||
/* Check TLB1 mappings */
|
||||
for (i = 0; i < TLB1_ENTRIES; i++) {
|
||||
tlb1_read_entry(&e, i);
|
||||
if (!(e.mas1 & MAS1_VALID))
|
||||
continue;
|
||||
if (va >= e.virt && va < e.virt + e.size)
|
||||
return (e.phys + (va - e.virt));
|
||||
}
|
||||
}
|
||||
|
||||
return (pte_vatopa(mmu, kernel_pmap, va));
|
||||
return (p);
|
||||
}
|
||||
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user