From 01c66110e17ad77765b90f07f6c6e357a1ba14b9 Mon Sep 17 00:00:00 2001 From: Piotr Pawel Stefaniak Date: Mon, 16 Jul 2018 05:46:50 +0000 Subject: [PATCH] indent(1): rewrite the integer/floating constant scanning part of lexi.c Remove procedural code that did the scanning, which was faulty and didn't support complex constants such as 0x1p-61. Replace it with a finite state machine expressed as a transition table. The table was rewritten by hand from lx's output, given parts of grammar expressed as regular expressions. lx is Katherine Flavel's lexer generator, currently available at https://github.com/katef/libfsm and the parts of grammar were taken from http://quut.com/c/ANSI-C-grammar-l-2011.html and extended to support binary integer constants which are a popular GCC extension. Reported by: bde --- usr.bin/indent/indent.c | 1 + usr.bin/indent/indent.h | 1 + usr.bin/indent/lexi.c | 176 ++++++++++++---------------- usr.bin/indent/tests/float.0 | 5 +- usr.bin/indent/tests/float.0.stdout | 5 +- 5 files changed, 86 insertions(+), 102 deletions(-) diff --git a/usr.bin/indent/indent.c b/usr.bin/indent/indent.c index 03e557ee807b..9b45daf63b0c 100644 --- a/usr.bin/indent/indent.c +++ b/usr.bin/indent/indent.c @@ -120,6 +120,7 @@ main(int argc, char **argv) if (tokenbuf == NULL) err(1, NULL); alloc_typenames(); + init_constant_tt(); l_com = combuf + bufsize - 5; l_lab = labbuf + bufsize - 5; l_code = codebuf + bufsize - 5; diff --git a/usr.bin/indent/indent.h b/usr.bin/indent/indent.h index 19ccf91f2905..2de26b1aac2e 100644 --- a/usr.bin/indent/indent.h +++ b/usr.bin/indent/indent.h @@ -36,6 +36,7 @@ int compute_code_target(void); int compute_label_target(void); int count_spaces(int, char *); int count_spaces_until(int, char *, char *); +void init_constant_tt(void); int lexi(struct parser_state *); void diag2(int, const char *); void diag3(int, const char *, int); diff --git a/usr.bin/indent/lexi.c b/usr.bin/indent/lexi.c index 15ec8c36d7b8..7d71ecd5cc19 100644 --- a/usr.bin/indent/lexi.c +++ b/usr.bin/indent/lexi.c @@ -54,15 +54,12 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include + #include "indent_globs.h" #include "indent_codes.h" #include "indent.h" -#define alphanum 1 -#ifdef undef -#define opchar 3 -#endif - struct templ { const char *rwd; int rwcode; @@ -122,26 +119,48 @@ const char **typenames; int typename_count; int typename_top = -1; -char chartype[128] = -{ /* this is used to facilitate the decision of - * what type (alphanumeric, operator) each - * character is */ - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 3, 0, 0, 1, 3, 3, 0, - 0, 0, 3, 3, 0, 3, 0, 3, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 0, 0, 3, 3, 3, 3, - 0, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 0, 0, 0, 3, 1, - 0, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 0, 3, 0, 3, 0 +/* + * The transition table below was rewritten by hand from lx's output, given + * the following definitions. lx is Katherine Flavel's lexer generator. + * + * O = /[0-7]/; D = /[0-9]/; NZ = /[1-9]/; + * H = /[a-f0-9]/i; B = /[0-1]/; HP = /0x/i; + * BP = /0b/i; E = /e[+\-]?/i D+; P = /p[+\-]?/i D+; + * FS = /[fl]/i; IS = /u/i /(l|L|ll|LL)/? | /(l|L|ll|LL)/ /u/i?; + * + * D+ E FS? -> $float; + * D* "." D+ E? FS? -> $float; + * D+ "." E? FS? -> $float; HP H+ IS? -> $int; + * HP H+ P FS? -> $float; NZ D* IS? -> $int; + * HP H* "." H+ P FS? -> $float; "0" O* IS? -> $int; + * HP H+ "." P FS -> $float; BP B+ IS? -> $int; + */ +static char const *table[] = { + /* examples: + 00 + s 0xx + t 00xaa + a 11 101100xxa.. + r 11ee0001101lbuuxx.a.pp + t.01.e+008bLuxll0Ll.aa.p+0 + states: ABCDEFGHIJKLMNOPQRSTUVWXYZ */ + ['0'] = "CEIDEHHHIJQ U Q VUVVZZZ", + ['1'] = "DEIDEHHHIJQ U Q VUVVZZZ", + ['7'] = "DEIDEHHHIJ U VUVVZZZ", + ['9'] = "DEJDEHHHJJ U VUVVZZZ", + ['a'] = " U VUVV ", + ['b'] = " K U VUVV ", + ['e'] = " FFF FF U VUVV ", + ['f'] = " f f U VUVV f", + ['u'] = " MM M i iiM M ", + ['x'] = " N ", + ['p'] = " FFX ", + ['L'] = " LLf fL PR Li L f", + ['l'] = " OOf fO S P O i O f", + ['+'] = " G Y ", + ['.'] = "B EE EE T W ", + /* ABCDEFGHIJKLMNOPQRSTUVWXYZ */ + [0] = "uuiifuufiuuiiuiiiiiuiuuuuu", }; static int @@ -173,7 +192,7 @@ lexi(struct parser_state *state) } /* Scan an alphanumeric token */ - if (chartype[*buf_ptr & 127] == alphanum || + if (isalnum((unsigned char)*buf_ptr) || (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) { /* * we have a character or number @@ -182,73 +201,28 @@ lexi(struct parser_state *state) if (isdigit((unsigned char)*buf_ptr) || (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) { - int seendot = 0, - seenexp = 0, - seensfx = 0; + char s; + unsigned char i; - /* - * base 2, base 8, base 16: - */ - if (buf_ptr[0] == '0' && buf_ptr[1] != '.') { - int len; - - if (buf_ptr[1] == 'b' || buf_ptr[1] == 'B') - len = strspn(buf_ptr + 2, "01") + 2; - else if (buf_ptr[1] == 'x' || buf_ptr[1] == 'X') - len = strspn(buf_ptr + 2, "0123456789ABCDEFabcdef") + 2; - else - len = strspn(buf_ptr + 1, "012345678") + 1; - if (len > 0) { - CHECK_SIZE_TOKEN(len); - memcpy(e_token, buf_ptr, len); - e_token += len; - buf_ptr += len; + for (s = 'A'; s != 'f' && s != 'i' && s != 'u'; ) { + i = (unsigned char)*buf_ptr; + if (i >= nitems(table) || table[i] == NULL || + table[i][s - 'A'] == ' ') { + s = table[0][s - 'A']; + break; } - else - diag2(1, "Unterminated literal"); - } - else /* base 10: */ - while (1) { - if (*buf_ptr == '.') { - if (seendot) - break; - else - seendot++; - } - CHECK_SIZE_TOKEN(3); - *e_token++ = *buf_ptr++; - if (!isdigit((unsigned char)*buf_ptr) && *buf_ptr != '.') { - if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) - break; - else { - seenexp++; - seendot++; - *e_token++ = *buf_ptr++; - if (*buf_ptr == '+' || *buf_ptr == '-') - *e_token++ = *buf_ptr++; - } - } - } - - while (1) { - CHECK_SIZE_TOKEN(2); - if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) { - *e_token++ = *buf_ptr++; - seensfx |= 1; - continue; - } - if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) { - if (buf_ptr[1] == buf_ptr[0]) - *e_token++ = *buf_ptr++; - *e_token++ = *buf_ptr++; - seensfx |= 2; - continue; - } - break; + s = table[i][s - 'A']; + CHECK_SIZE_TOKEN(1); + *e_token++ = *buf_ptr++; + if (buf_ptr >= buf_end) + fill_buffer(); } + /* s now indicates the type: f(loating), i(integer), u(nknown) */ } else - while (chartype[*buf_ptr & 127] == alphanum || *buf_ptr == BACKSLASH) { + while (isalnum((unsigned char)*buf_ptr) || + *buf_ptr == BACKSLASH || + *buf_ptr == '_') { /* fill_buffer() terminates buffer with newline */ if (*buf_ptr == BACKSLASH) { if (*(buf_ptr + 1) == '\n') { @@ -527,21 +501,11 @@ lexi(struct parser_state *state) case '=': if (state->in_or_st) state->block_init = 1; -#ifdef undef - if (chartype[*buf_ptr & 127] == opchar) { /* we have two char assignment */ - e_token[-1] = *buf_ptr++; - if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) - *e_token++ = *buf_ptr++; - *e_token++ = '='; /* Flip =+ to += */ - *e_token = 0; - } -#else if (*buf_ptr == '=') {/* == */ *e_token++ = '='; /* Flip =+ to += */ buf_ptr++; *e_token = 0; } -#endif code = binary_op; unary_delim = true; break; @@ -625,6 +589,22 @@ lexi(struct parser_state *state) return (code); } +/* Initialize constant transition table */ +void +init_constant_tt(void) +{ + table['-'] = table['+']; + table['8'] = table['9']; + table['2'] = table['3'] = table['4'] = table['5'] = table['6'] = table['7']; + table['A'] = table['C'] = table['D'] = table['c'] = table['d'] = table['a']; + table['B'] = table['b']; + table['E'] = table['e']; + table['U'] = table['u']; + table['X'] = table['x']; + table['P'] = table['p']; + table['F'] = table['f']; +} + void alloc_typenames(void) { diff --git a/usr.bin/indent/tests/float.0 b/usr.bin/indent/tests/float.0 index 91f017fc1ce8..ec441ec8870f 100644 --- a/usr.bin/indent/tests/float.0 +++ b/usr.bin/indent/tests/float.0 @@ -1,6 +1,7 @@ /* $FreeBSD$ */ -/* See r303499 */ void t(void) { unsigned long x = 314UL; - float y = 3.14f; + double y[] = {0x1P+9F, 0.3, .1, 1.2f, 0xa.p01f, 3.14f, 2.L}; + int z = 0b0101; + DO_NOTHING; } diff --git a/usr.bin/indent/tests/float.0.stdout b/usr.bin/indent/tests/float.0.stdout index 0f213182ff9b..679bbfecfb18 100644 --- a/usr.bin/indent/tests/float.0.stdout +++ b/usr.bin/indent/tests/float.0.stdout @@ -1,8 +1,9 @@ /* $FreeBSD$ */ -/* See r303499 */ void t(void) { unsigned long x = 314UL; - float y = 3.14f; + double y[] = {0x1P+9F, 0.3, .1, 1.2f, 0xa.p01f, 3.14f, 2.L}; + int z = 0b0101; + DO_NOTHING; }