json/parse: rewrite and simplify number parsing
Convert the number parsing function into a linear sequence with a goto label for each state, rather than a single loop with a state variable. This makes the code easier to read and also improves speed (better branch prediction and smaller inner loops for the common case). On my test system, jsoncat citylots.json > /dev/null improves from ~1.7s to ~1.2s. This changes behavior of some number parsing test cases: inputs matching the number grammar as defined by JSON will be returned even if there is trailing garbage, consistent with the rest of the parser. For example, the input 01 will be parsed as a valid number 0 followed by trailing 1. This only makes any difference when the full input is a single number value, since if the value was nested in an object or array, the trailing garbage will not match the expected syntax and the whole parse will fail with SPDK_JSON_PARSE_INVALID (e.g. [00 will parse the first 0 as a number and then fail on the second 0, since only a comma or right square bracket would be accepted). Change-Id: Ifabfaed611219b3e0a06c8677190a28b87e8a13b Signed-off-by: Daniel Verkamp <daniel.verkamp@intel.com>
This commit is contained in:
parent
a509ddeb24
commit
38c09e5eed
@ -252,125 +252,100 @@ static int
|
||||
json_valid_number(uint8_t *start, uint8_t *buf_end)
|
||||
{
|
||||
uint8_t *p = start;
|
||||
enum {
|
||||
NUM_STATE_START,
|
||||
NUM_STATE_INT_FIRST_DIGIT,
|
||||
NUM_STATE_INT_DIGITS,
|
||||
NUM_STATE_FRAC_OR_EXP,
|
||||
NUM_STATE_FRAC_FIRST_DIGIT,
|
||||
NUM_STATE_FRAC_DIGITS,
|
||||
NUM_STATE_EXP_SIGN,
|
||||
NUM_STATE_EXP_FIRST_DIGIT,
|
||||
NUM_STATE_EXP_DIGITS,
|
||||
} state = NUM_STATE_START;
|
||||
uint8_t c;
|
||||
|
||||
if (p >= buf_end) return -1;
|
||||
|
||||
while (p != buf_end) {
|
||||
uint8_t c = *p++;
|
||||
c = *p++;
|
||||
if (c >= '1' && c <= '9') goto num_int_digits;
|
||||
if (c == '0') goto num_frac_or_exp;
|
||||
if (c == '-') goto num_int_first_digit;
|
||||
p--;
|
||||
goto done_invalid;
|
||||
|
||||
switch (c) {
|
||||
case '0':
|
||||
if (state == NUM_STATE_START || state == NUM_STATE_INT_FIRST_DIGIT) {
|
||||
/*
|
||||
* If the very first digit is 0,
|
||||
* it must be the last digit of the integer part
|
||||
* (no leading zeroes allowed).
|
||||
*/
|
||||
state = NUM_STATE_FRAC_OR_EXP;
|
||||
break;
|
||||
}
|
||||
/* fallthrough */
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
switch (state) {
|
||||
case NUM_STATE_START:
|
||||
case NUM_STATE_INT_FIRST_DIGIT:
|
||||
state = NUM_STATE_INT_DIGITS;
|
||||
break;
|
||||
|
||||
case NUM_STATE_FRAC_FIRST_DIGIT:
|
||||
state = NUM_STATE_FRAC_DIGITS;
|
||||
break;
|
||||
|
||||
case NUM_STATE_EXP_SIGN:
|
||||
case NUM_STATE_EXP_FIRST_DIGIT:
|
||||
state = NUM_STATE_EXP_DIGITS;
|
||||
break;
|
||||
|
||||
case NUM_STATE_INT_DIGITS:
|
||||
case NUM_STATE_FRAC_DIGITS:
|
||||
case NUM_STATE_EXP_DIGITS:
|
||||
/* stay in same state */
|
||||
break;
|
||||
|
||||
default:
|
||||
return SPDK_JSON_PARSE_INVALID;
|
||||
}
|
||||
break;
|
||||
|
||||
case '.':
|
||||
if (state != NUM_STATE_INT_DIGITS && state != NUM_STATE_FRAC_OR_EXP) {
|
||||
return SPDK_JSON_PARSE_INVALID;
|
||||
}
|
||||
state = NUM_STATE_FRAC_FIRST_DIGIT;
|
||||
break;
|
||||
|
||||
case 'e':
|
||||
case 'E':
|
||||
switch (state) {
|
||||
case NUM_STATE_INT_DIGITS:
|
||||
case NUM_STATE_FRAC_OR_EXP:
|
||||
case NUM_STATE_FRAC_DIGITS:
|
||||
state = NUM_STATE_EXP_SIGN;
|
||||
break;
|
||||
default:
|
||||
return SPDK_JSON_PARSE_INVALID;
|
||||
}
|
||||
break;
|
||||
|
||||
case '-':
|
||||
if (state == NUM_STATE_START) {
|
||||
state = NUM_STATE_INT_FIRST_DIGIT;
|
||||
break;
|
||||
}
|
||||
/* fallthrough */
|
||||
case '+':
|
||||
if (state == NUM_STATE_EXP_SIGN) {
|
||||
state = NUM_STATE_EXP_FIRST_DIGIT;
|
||||
} else {
|
||||
return SPDK_JSON_PARSE_INVALID;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
/*
|
||||
* Got an unexpected character - back up and stop parsing number.
|
||||
* The top-level parsing code will handle invalid trailing characters.
|
||||
*/
|
||||
p--;
|
||||
goto done;
|
||||
}
|
||||
num_int_first_digit:
|
||||
if (spdk_likely(p != buf_end)) {
|
||||
c = *p++;
|
||||
if (c == '0') goto num_frac_or_exp;
|
||||
if (c >= '1' && c <= '9') goto num_int_digits;
|
||||
p--;
|
||||
}
|
||||
goto done_invalid;
|
||||
|
||||
done:
|
||||
switch (state) {
|
||||
case NUM_STATE_INT_DIGITS:
|
||||
case NUM_STATE_FRAC_OR_EXP:
|
||||
case NUM_STATE_FRAC_DIGITS:
|
||||
case NUM_STATE_EXP_DIGITS:
|
||||
/* Valid end state */
|
||||
return p - start;
|
||||
num_int_digits:
|
||||
if (spdk_likely(p != buf_end)) {
|
||||
c = *p++;
|
||||
if (c >= '0' && c <= '9') goto num_int_digits;
|
||||
if (c == '.') goto num_frac_first_digit;
|
||||
if (c == 'e' || c == 'E') goto num_exp_sign;
|
||||
p--;
|
||||
}
|
||||
goto done_valid;
|
||||
|
||||
default:
|
||||
num_frac_or_exp:
|
||||
if (spdk_likely(p != buf_end)) {
|
||||
c = *p++;
|
||||
if (c == '.') goto num_frac_first_digit;
|
||||
if (c == 'e' || c == 'E') goto num_exp_sign;
|
||||
p--;
|
||||
}
|
||||
goto done_valid;
|
||||
|
||||
num_frac_first_digit:
|
||||
if (spdk_likely(p != buf_end)) {
|
||||
c = *p++;
|
||||
if (c >= '0' && c <= '9') goto num_frac_digits;
|
||||
p--;
|
||||
}
|
||||
goto done_invalid;
|
||||
|
||||
num_frac_digits:
|
||||
if (spdk_likely(p != buf_end)) {
|
||||
c = *p++;
|
||||
if (c >= '0' && c <= '9') goto num_frac_digits;
|
||||
if (c == 'e' || c == 'E') goto num_exp_sign;
|
||||
p--;
|
||||
}
|
||||
goto done_valid;
|
||||
|
||||
num_exp_sign:
|
||||
if (spdk_likely(p != buf_end)) {
|
||||
c = *p++;
|
||||
if (c >= '0' && c <= '9') goto num_exp_digits;
|
||||
if (c == '-' || c == '+') goto num_exp_first_digit;
|
||||
p--;
|
||||
}
|
||||
goto done_invalid;
|
||||
|
||||
num_exp_first_digit:
|
||||
if (spdk_likely(p != buf_end)) {
|
||||
c = *p++;
|
||||
if (c >= '0' && c <= '9') goto num_exp_digits;
|
||||
p--;
|
||||
}
|
||||
goto done_invalid;
|
||||
|
||||
num_exp_digits:
|
||||
if (spdk_likely(p != buf_end)) {
|
||||
c = *p++;
|
||||
if (c >= '0' && c <= '9') goto num_exp_digits;
|
||||
p--;
|
||||
}
|
||||
goto done_valid;
|
||||
|
||||
done_valid:
|
||||
/* Valid end state */
|
||||
return p - start;
|
||||
|
||||
done_invalid:
|
||||
/* Invalid end state */
|
||||
if (p == buf_end) {
|
||||
/* Hit the end of the buffer - the stream is incomplete. */
|
||||
return SPDK_JSON_PARSE_INCOMPLETE;
|
||||
}
|
||||
|
||||
/* Found an invalid character in an invalid end state */
|
||||
return SPDK_JSON_PARSE_INVALID;
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -444,18 +444,33 @@ test_parse_number(void)
|
||||
PARSE_PASS("0true", 1, "true");
|
||||
VAL_NUMBER("0");
|
||||
|
||||
NUM_FAIL("00", SPDK_JSON_PARSE_INVALID);
|
||||
NUM_FAIL("007", SPDK_JSON_PARSE_INVALID);
|
||||
PARSE_PASS("00", 1, "0");
|
||||
VAL_NUMBER("0");
|
||||
PARSE_FAIL("[00", SPDK_JSON_PARSE_INVALID);
|
||||
|
||||
PARSE_PASS("007", 1, "07");
|
||||
VAL_NUMBER("0");
|
||||
PARSE_FAIL("[007]", SPDK_JSON_PARSE_INVALID);
|
||||
|
||||
PARSE_PASS("345.678.1", 1, ".1");
|
||||
VAL_NUMBER("345.678");
|
||||
PARSE_FAIL("[345.678.1]", SPDK_JSON_PARSE_INVALID);
|
||||
|
||||
PARSE_PASS("3.2e-4+5", 1, "+5");
|
||||
VAL_NUMBER("3.2e-4");
|
||||
PARSE_FAIL("[3.2e-4+5]", SPDK_JSON_PARSE_INVALID);
|
||||
|
||||
PARSE_PASS("3.4.5", 1, ".5");
|
||||
VAL_NUMBER("3.4");
|
||||
PARSE_FAIL("[3.4.5]", SPDK_JSON_PARSE_INVALID);
|
||||
|
||||
NUM_FAIL("345.", SPDK_JSON_PARSE_INCOMPLETE);
|
||||
NUM_FAIL("345.678.1", SPDK_JSON_PARSE_INVALID);
|
||||
NUM_FAIL("+1", SPDK_JSON_PARSE_INVALID);
|
||||
NUM_FAIL("--1", SPDK_JSON_PARSE_INVALID);
|
||||
NUM_FAIL("3.", SPDK_JSON_PARSE_INCOMPLETE);
|
||||
NUM_FAIL("3.+4", SPDK_JSON_PARSE_INVALID);
|
||||
NUM_FAIL("3.2e+-4", SPDK_JSON_PARSE_INVALID);
|
||||
NUM_FAIL("3.2e-+4", SPDK_JSON_PARSE_INVALID);
|
||||
NUM_FAIL("3.2e-4+5", SPDK_JSON_PARSE_INVALID);
|
||||
NUM_FAIL("3.4.5", SPDK_JSON_PARSE_INVALID);
|
||||
NUM_FAIL("3e+", SPDK_JSON_PARSE_INCOMPLETE);
|
||||
NUM_FAIL("3e-", SPDK_JSON_PARSE_INCOMPLETE);
|
||||
NUM_FAIL("3.e4", SPDK_JSON_PARSE_INVALID);
|
||||
|
Loading…
x
Reference in New Issue
Block a user