From 32c7ac38ed88b4733e83958f05a5e493637158f8 Mon Sep 17 00:00:00 2001 From: Ian Lepore Date: Sun, 27 Jan 2013 20:28:14 +0000 Subject: [PATCH] Fix off-by-one errors in low-level arm9 and arm10 cache maintenance routines. In all the routines that loop through a range of virtual addresses, the loop is controlled by subtracting the cache line size from the total length of the request. After the subtract, a 'bpl' instruction was used, which branches if the result of the subtraction is zero or greater, but we need to exit the loop when the count hits zero. Thus, all the bpl instructions in those loops have been changed to 'bhi' (branch if greater than zero). In addition, the two routines that walk through the cache using set-and-index were correct, but confusing. The loop control for those has been simplified, just so that it's easier to see by examination that the code is correct. Routines for other arm architectures and generations still have the bpl instruction, but compensate for the off-by-one situation by decrementing the count register by one before entering the loop. PR: arm/174461 Approved by: cognet (mentor) --- sys/arm/arm/cpufunc_asm_arm10.S | 26 +++++++++++--------------- sys/arm/arm/cpufunc_asm_arm9.S | 26 +++++++++++--------------- 2 files changed, 22 insertions(+), 30 deletions(-) diff --git a/sys/arm/arm/cpufunc_asm_arm10.S b/sys/arm/arm/cpufunc_asm_arm10.S index 22da6aadc950..2ef999c92784 100644 --- a/sys/arm/arm/cpufunc_asm_arm10.S +++ b/sys/arm/arm/cpufunc_asm_arm10.S @@ -87,7 +87,7 @@ ENTRY_NP(arm10_icache_sync_range) mcr p15, 0, r0, c7, c10, 1 /* Clean D cache SE with VA */ add r0, r0, ip subs r1, r1, ip - bpl .Larm10_sync_next + bhi .Larm10_sync_next mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ bx lr @@ -108,12 +108,10 @@ ENTRY_NP(arm10_icache_sync_all) orr ip, s_max, i_max .Lnext_index: mcr p15, 0, ip, c7, c10, 2 /* Clean D cache SE with Set/Index */ - sub ip, ip, i_inc - tst ip, i_max /* Index 0 is last one */ - bne .Lnext_index /* Next index */ - mcr p15, 0, ip, c7, c10, 2 /* Clean D cache SE with Set/Index */ + subs ip, ip, i_inc + bhs .Lnext_index /* Next index */ subs s_max, s_max, s_inc - bpl .Lnext_set /* Next set */ + bhs .Lnext_set /* Next set */ mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ bx lr @@ -133,7 +131,7 @@ ENTRY(arm10_dcache_wb_range) mcr p15, 0, r0, c7, c10, 1 /* Clean D cache SE with VA */ add r0, r0, ip subs r1, r1, ip - bpl .Larm10_wb_next + bhi .Larm10_wb_next mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ bx lr @@ -150,7 +148,7 @@ ENTRY(arm10_dcache_wbinv_range) mcr p15, 0, r0, c7, c14, 1 /* Purge D cache SE with VA */ add r0, r0, ip subs r1, r1, ip - bpl .Larm10_wbinv_next + bhi .Larm10_wbinv_next mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ bx lr @@ -171,7 +169,7 @@ ENTRY(arm10_dcache_inv_range) mcr p15, 0, r0, c7, c6, 1 /* Invalidate D cache SE with VA */ add r0, r0, ip subs r1, r1, ip - bpl .Larm10_inv_next + bhi .Larm10_inv_next mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ bx lr @@ -189,7 +187,7 @@ ENTRY(arm10_idcache_wbinv_range) mcr p15, 0, r0, c7, c14, 1 /* Purge D cache SE with VA */ add r0, r0, ip subs r1, r1, ip - bpl .Larm10_id_wbinv_next + bhi .Larm10_id_wbinv_next mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ bx lr @@ -211,12 +209,10 @@ ENTRY(arm10_dcache_wbinv_all) orr ip, s_max, i_max .Lnext_index_inv: mcr p15, 0, ip, c7, c14, 2 /* Purge D cache SE with Set/Index */ - sub ip, ip, i_inc - tst ip, i_max /* Index 0 is last one */ - bne .Lnext_index_inv /* Next index */ - mcr p15, 0, ip, c7, c14, 2 /* Purge D cache SE with Set/Index */ + subs ip, ip, i_inc + bhs .Lnext_index_inv /* Next index */ subs s_max, s_max, s_inc - bpl .Lnext_set_inv /* Next set */ + bhs .Lnext_set_inv /* Next set */ mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */ bx lr diff --git a/sys/arm/arm/cpufunc_asm_arm9.S b/sys/arm/arm/cpufunc_asm_arm9.S index 291d3f7bdbfc..ae9fe0009c98 100644 --- a/sys/arm/arm/cpufunc_asm_arm9.S +++ b/sys/arm/arm/cpufunc_asm_arm9.S @@ -81,7 +81,7 @@ ENTRY_NP(arm9_icache_sync_range) mcr p15, 0, r0, c7, c10, 1 /* Clean D cache SE with VA */ add r0, r0, ip subs r1, r1, ip - bpl .Larm9_sync_next + bhi .Larm9_sync_next mov pc, lr ENTRY_NP(arm9_icache_sync_all) @@ -101,12 +101,10 @@ ENTRY_NP(arm9_icache_sync_all) orr ip, s_max, i_max .Lnext_index: mcr p15, 0, ip, c7, c10, 2 /* Clean D cache SE with Set/Index */ - sub ip, ip, i_inc - tst ip, i_max /* Index 0 is last one */ - bne .Lnext_index /* Next index */ - mcr p15, 0, ip, c7, c10, 2 /* Clean D cache SE with Set/Index */ + subs ip, ip, i_inc + bhs .Lnext_index /* Next index */ subs s_max, s_max, s_inc - bpl .Lnext_set /* Next set */ + bhs .Lnext_set /* Next set */ mov pc, lr .Larm9_line_size: @@ -125,7 +123,7 @@ ENTRY(arm9_dcache_wb_range) mcr p15, 0, r0, c7, c10, 1 /* Clean D cache SE with VA */ add r0, r0, ip subs r1, r1, ip - bpl .Larm9_wb_next + bhi .Larm9_wb_next mov pc, lr ENTRY(arm9_dcache_wbinv_range) @@ -141,7 +139,7 @@ ENTRY(arm9_dcache_wbinv_range) mcr p15, 0, r0, c7, c14, 1 /* Purge D cache SE with VA */ add r0, r0, ip subs r1, r1, ip - bpl .Larm9_wbinv_next + bhi .Larm9_wbinv_next mov pc, lr /* @@ -161,7 +159,7 @@ ENTRY(arm9_dcache_inv_range) mcr p15, 0, r0, c7, c6, 1 /* Invalidate D cache SE with VA */ add r0, r0, ip subs r1, r1, ip - bpl .Larm9_inv_next + bhi .Larm9_inv_next mov pc, lr ENTRY(arm9_idcache_wbinv_range) @@ -178,7 +176,7 @@ ENTRY(arm9_idcache_wbinv_range) mcr p15, 0, r0, c7, c14, 1 /* Purge D cache SE with VA */ add r0, r0, ip subs r1, r1, ip - bpl .Larm9_id_wbinv_next + bhi .Larm9_id_wbinv_next mov pc, lr ENTRY_NP(arm9_idcache_wbinv_all) @@ -199,12 +197,10 @@ ENTRY(arm9_dcache_wbinv_all) orr ip, s_max, i_max .Lnext_index_inv: mcr p15, 0, ip, c7, c14, 2 /* Purge D cache SE with Set/Index */ - sub ip, ip, i_inc - tst ip, i_max /* Index 0 is last one */ - bne .Lnext_index_inv /* Next index */ - mcr p15, 0, ip, c7, c14, 2 /* Purge D cache SE with Set/Index */ + subs ip, ip, i_inc + bhs .Lnext_index_inv /* Next index */ subs s_max, s_max, s_inc - bpl .Lnext_set_inv /* Next set */ + bhs .Lnext_set_inv /* Next set */ mov pc, lr .Larm9_cache_data: