Loops with 'loop forever' annotation inside strcmp are actually meant to
loop forever. Falling through the end of the first loop may result in
equal strings being compared unequal, e.g.:
#include <string.h>
int main(void)
{
char a[4096] __attribute__((aligned(4)));
char b[4096] __attribute__((aligned(4)));
memset(a, ' ', 258 * 8);
memset(b, ' ', 258 * 8);
a[255 * 8] = 0;
a[256 * 8] = 'a';
b[255 * 8] = 0;
b[256 * 8] = 'b';
return !(strcmp(a, b) == 0);
}
Falling through the end of the second loop may result in unequal strings
being compared as equal, e.g.:
#include <string.h>
int main(void)
{
char a[4096] __attribute__((aligned(4)));
char b[4096] __attribute__((aligned(4)));
memset(a, ' ', 514 * 6);
memset(b, ' ', 514 * 6);
a[514 * 6 + 0] = 'a';
a[514 * 6 + 1] = 0;
b[514 * 6 + 0] = 'b';
b[514 * 6 + 1] = 0;
return !(strcmp(a, b) != 0);
}
Use 0 as a loop counter to make 2^32 - 1 iterations which is enough to
cover all addressable memory. While at it drop useless nop at the end of
the first loop and use a11 for all loop counters.
Signed-off-by: Max Filippov <jcmvbkbc(a)gmail.com>
---
Changes v1->v2:
- wrong test case for the second loop, fixed.
libc/string/xtensa/strcmp.S | 13 +++++--------
1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/libc/string/xtensa/strcmp.S b/libc/string/xtensa/strcmp.S
index 2dce590dbd42..a16da5da2e92 100644
--- a/libc/string/xtensa/strcmp.S
+++ b/libc/string/xtensa/strcmp.S
@@ -92,8 +92,8 @@ ENTRY (strcmp)
/* (2 mod 4) alignment for loop instruction */
.Lunaligned:
#if XCHAL_HAVE_LOOPS
- _movi.n a8, 0 /* set up for the maximum loop count */
- loop a8, .Lretdiff /* loop forever (almost anyway) */
+ movi a11, 0 /* set up for the maximum loop count */
+ loop a11, .Lretdiff /* loop forever (almost anyway) */
#endif
.Lnextbyte:
l8ui a8, a2, 0
@@ -131,11 +131,10 @@ ENTRY (strcmp)
#if XCHAL_HAVE_LOOPS
.Laligned:
.begin no-transform
+ movi a11, 0
l32r a4, .Lmask0 /* mask for byte 0 */
l32r a7, .Lmask4
- /* Loop forever. (a4 is more than than the maximum number
- of iterations) */
- loop a4, .Laligned_done
+ loop a11, .Laligned_done /* Loop forever. */
/* First unrolled loop body. */
l32i a8, a2, 0 /* get word from s1 */
@@ -156,8 +155,6 @@ ENTRY (strcmp)
addi a2, a2, 8 /* advance s1 pointer */
addi a3, a3, 8 /* advance s2 pointer */
.Laligned_done:
- or a1, a1, a1 /* nop */
-
.Lprobeq2:
/* Adjust pointers to account for the loop unrolling. */
addi a2, a2, 4
@@ -198,7 +195,7 @@ ENTRY (strcmp)
#if XCHAL_HAVE_LOOPS
/* align (1 mod 4) */
- loop a4, .Leq /* loop forever (a4 is bigger than max iters) */
+ loop a11, .Leq /* loop forever */
.end no-transform
l32i a8, a2, 0 /* get word from s1 */
--
2.1.4