diff --git a/bin/sh/expand.c b/bin/sh/expand.c index 75dcf5cad7d4..197be7e8e2d9 100644 --- a/bin/sh/expand.c +++ b/bin/sh/expand.c @@ -1445,57 +1445,63 @@ int patmatch(const char *pattern, const char *string, int squoted) { const char *p, *q, *end; + const char *bt_p, *bt_q; char c; wchar_t wc, wc2; p = pattern; q = string; + bt_p = NULL; + bt_q = NULL; for (;;) { switch (c = *p++) { case '\0': - goto breakloop; + if (*q != '\0') + goto backtrack; + return 1; case CTLESC: if (squoted && *q == CTLESC) q++; if (*q++ != *p++) - return 0; + goto backtrack; break; case CTLQUOTEMARK: continue; case '?': if (squoted && *q == CTLESC) q++; - if (localeisutf8) - wc = get_wc(&q); - else - wc = (unsigned char)*q++; - if (wc == '\0') + if (*q == '\0') return 0; + if (localeisutf8) { + wc = get_wc(&q); + /* + * A '?' does not match invalid UTF-8 but a + * '*' does, so backtrack. + */ + if (wc == 0) + goto backtrack; + } else + wc = (unsigned char)*q++; break; case '*': c = *p; while (c == CTLQUOTEMARK || c == '*') c = *++p; - if (c != CTLESC && c != CTLQUOTEMARK && - c != '?' && c != '*' && c != '[') { - while (*q != c) { - if (squoted && *q == CTLESC && - q[1] == c) - break; - if (*q == '\0') - return 0; - if (squoted && *q == CTLESC) - q++; - q++; - } - } - do { - if (patmatch(p, q, squoted)) - return 1; - if (squoted && *q == CTLESC) - q++; - } while (*q++ != '\0'); - return 0; + /* + * If the pattern ends here, we know the string + * matches without needing to look at the rest of it. + */ + if (c == '\0') + return 1; + /* + * First try the shortest match for the '*' that + * could work. We can forget any earlier '*' since + * there is no way having it match more characters + * can help us, given that we are already here. + */ + bt_p = p; + bt_q = q; + break; case '[': { const char *endp; int invert, found; @@ -1507,7 +1513,7 @@ patmatch(const char *pattern, const char *string, int squoted) for (;;) { while (*endp == CTLQUOTEMARK) endp++; - if (*endp == '\0') + if (*endp == 0) goto dft; /* no matching ] */ if (*endp == CTLESC) endp++; @@ -1522,12 +1528,14 @@ patmatch(const char *pattern, const char *string, int squoted) found = 0; if (squoted && *q == CTLESC) q++; - if (localeisutf8) - chr = get_wc(&q); - else - chr = (unsigned char)*q++; - if (chr == '\0') + if (*q == '\0') return 0; + if (localeisutf8) { + chr = get_wc(&q); + if (chr == 0) + goto backtrack; + } else + chr = (unsigned char)*q++; c = *p++; do { if (c == CTLQUOTEMARK) @@ -1568,21 +1576,34 @@ patmatch(const char *pattern, const char *string, int squoted) } } while ((c = *p++) != ']'); if (found == invert) - return 0; + goto backtrack; break; } dft: default: if (squoted && *q == CTLESC) q++; - if (*q++ != c) + if (*q == '\0') return 0; + if (*q++ == c) + break; +backtrack: + /* + * If we have a mismatch (other than hitting the end + * of the string), go back to the last '*' seen and + * have it match one additional character. + */ + if (bt_p == NULL) + return 0; + if (squoted && *bt_q == CTLESC) + bt_q++; + if (*bt_q == '\0') + return 0; + bt_q++; + p = bt_p; + q = bt_q; break; } } -breakloop: - if (*q != '\0') - return 0; - return 1; }