indent(1): rewrite the integer/floating constant scanning part of lexi.c
Remove procedural code that did the scanning, which was faulty and didn't support complex constants such as 0x1p-61. Replace it with a finite state machine expressed as a transition table. The table was rewritten by hand from lx's output, given parts of grammar expressed as regular expressions. lx is Katherine Flavel's lexer generator, currently available at https://github.com/katef/libfsm and the parts of grammar were taken from http://quut.com/c/ANSI-C-grammar-l-2011.html and extended to support binary integer constants which are a popular GCC extension. Reported by: bde
This commit is contained in:
parent
f327aaf32d
commit
04c77c18ad
@ -120,6 +120,7 @@ main(int argc, char **argv)
|
|||||||
if (tokenbuf == NULL)
|
if (tokenbuf == NULL)
|
||||||
err(1, NULL);
|
err(1, NULL);
|
||||||
alloc_typenames();
|
alloc_typenames();
|
||||||
|
init_constant_tt();
|
||||||
l_com = combuf + bufsize - 5;
|
l_com = combuf + bufsize - 5;
|
||||||
l_lab = labbuf + bufsize - 5;
|
l_lab = labbuf + bufsize - 5;
|
||||||
l_code = codebuf + bufsize - 5;
|
l_code = codebuf + bufsize - 5;
|
||||||
|
@ -36,6 +36,7 @@ int compute_code_target(void);
|
|||||||
int compute_label_target(void);
|
int compute_label_target(void);
|
||||||
int count_spaces(int, char *);
|
int count_spaces(int, char *);
|
||||||
int count_spaces_until(int, char *, char *);
|
int count_spaces_until(int, char *, char *);
|
||||||
|
void init_constant_tt(void);
|
||||||
int lexi(struct parser_state *);
|
int lexi(struct parser_state *);
|
||||||
void diag2(int, const char *);
|
void diag2(int, const char *);
|
||||||
void diag3(int, const char *, int);
|
void diag3(int, const char *, int);
|
||||||
|
@ -54,15 +54,12 @@ __FBSDID("$FreeBSD$");
|
|||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <sys/param.h>
|
||||||
|
|
||||||
#include "indent_globs.h"
|
#include "indent_globs.h"
|
||||||
#include "indent_codes.h"
|
#include "indent_codes.h"
|
||||||
#include "indent.h"
|
#include "indent.h"
|
||||||
|
|
||||||
#define alphanum 1
|
|
||||||
#ifdef undef
|
|
||||||
#define opchar 3
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct templ {
|
struct templ {
|
||||||
const char *rwd;
|
const char *rwd;
|
||||||
int rwcode;
|
int rwcode;
|
||||||
@ -122,26 +119,48 @@ const char **typenames;
|
|||||||
int typename_count;
|
int typename_count;
|
||||||
int typename_top = -1;
|
int typename_top = -1;
|
||||||
|
|
||||||
char chartype[128] =
|
/*
|
||||||
{ /* this is used to facilitate the decision of
|
* The transition table below was rewritten by hand from lx's output, given
|
||||||
* what type (alphanumeric, operator) each
|
* the following definitions. lx is Katherine Flavel's lexer generator.
|
||||||
* character is */
|
*
|
||||||
0, 0, 0, 0, 0, 0, 0, 0,
|
* O = /[0-7]/; D = /[0-9]/; NZ = /[1-9]/;
|
||||||
0, 0, 0, 0, 0, 0, 0, 0,
|
* H = /[a-f0-9]/i; B = /[0-1]/; HP = /0x/i;
|
||||||
0, 0, 0, 0, 0, 0, 0, 0,
|
* BP = /0b/i; E = /e[+\-]?/i D+; P = /p[+\-]?/i D+;
|
||||||
0, 0, 0, 0, 0, 0, 0, 0,
|
* FS = /[fl]/i; IS = /u/i /(l|L|ll|LL)/? | /(l|L|ll|LL)/ /u/i?;
|
||||||
0, 3, 0, 0, 1, 3, 3, 0,
|
*
|
||||||
0, 0, 3, 3, 0, 3, 0, 3,
|
* D+ E FS? -> $float;
|
||||||
1, 1, 1, 1, 1, 1, 1, 1,
|
* D* "." D+ E? FS? -> $float;
|
||||||
1, 1, 0, 0, 3, 3, 3, 3,
|
* D+ "." E? FS? -> $float; HP H+ IS? -> $int;
|
||||||
0, 1, 1, 1, 1, 1, 1, 1,
|
* HP H+ P FS? -> $float; NZ D* IS? -> $int;
|
||||||
1, 1, 1, 1, 1, 1, 1, 1,
|
* HP H* "." H+ P FS? -> $float; "0" O* IS? -> $int;
|
||||||
1, 1, 1, 1, 1, 1, 1, 1,
|
* HP H+ "." P FS -> $float; BP B+ IS? -> $int;
|
||||||
1, 1, 1, 0, 0, 0, 3, 1,
|
*/
|
||||||
0, 1, 1, 1, 1, 1, 1, 1,
|
static char const *table[] = {
|
||||||
1, 1, 1, 1, 1, 1, 1, 1,
|
/* examples:
|
||||||
1, 1, 1, 1, 1, 1, 1, 1,
|
00
|
||||||
1, 1, 1, 0, 3, 0, 3, 0
|
s 0xx
|
||||||
|
t 00xaa
|
||||||
|
a 11 101100xxa..
|
||||||
|
r 11ee0001101lbuuxx.a.pp
|
||||||
|
t.01.e+008bLuxll0Ll.aa.p+0
|
||||||
|
states: ABCDEFGHIJKLMNOPQRSTUVWXYZ */
|
||||||
|
['0'] = "CEIDEHHHIJQ U Q VUVVZZZ",
|
||||||
|
['1'] = "DEIDEHHHIJQ U Q VUVVZZZ",
|
||||||
|
['7'] = "DEIDEHHHIJ U VUVVZZZ",
|
||||||
|
['9'] = "DEJDEHHHJJ U VUVVZZZ",
|
||||||
|
['a'] = " U VUVV ",
|
||||||
|
['b'] = " K U VUVV ",
|
||||||
|
['e'] = " FFF FF U VUVV ",
|
||||||
|
['f'] = " f f U VUVV f",
|
||||||
|
['u'] = " MM M i iiM M ",
|
||||||
|
['x'] = " N ",
|
||||||
|
['p'] = " FFX ",
|
||||||
|
['L'] = " LLf fL PR Li L f",
|
||||||
|
['l'] = " OOf fO S P O i O f",
|
||||||
|
['+'] = " G Y ",
|
||||||
|
['.'] = "B EE EE T W ",
|
||||||
|
/* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
|
||||||
|
[0] = "uuiifuufiuuiiuiiiiiuiuuuuu",
|
||||||
};
|
};
|
||||||
|
|
||||||
static int
|
static int
|
||||||
@ -173,7 +192,7 @@ lexi(struct parser_state *state)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Scan an alphanumeric token */
|
/* Scan an alphanumeric token */
|
||||||
if (chartype[*buf_ptr & 127] == alphanum ||
|
if (isalnum((unsigned char)*buf_ptr) ||
|
||||||
(buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
|
(buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
|
||||||
/*
|
/*
|
||||||
* we have a character or number
|
* we have a character or number
|
||||||
@ -182,73 +201,28 @@ lexi(struct parser_state *state)
|
|||||||
|
|
||||||
if (isdigit((unsigned char)*buf_ptr) ||
|
if (isdigit((unsigned char)*buf_ptr) ||
|
||||||
(buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
|
(buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
|
||||||
int seendot = 0,
|
char s;
|
||||||
seenexp = 0,
|
unsigned char i;
|
||||||
seensfx = 0;
|
|
||||||
|
|
||||||
/*
|
for (s = 'A'; s != 'f' && s != 'i' && s != 'u'; ) {
|
||||||
* base 2, base 8, base 16:
|
i = (unsigned char)*buf_ptr;
|
||||||
*/
|
if (i >= nitems(table) || table[i] == NULL ||
|
||||||
if (buf_ptr[0] == '0' && buf_ptr[1] != '.') {
|
table[i][s - 'A'] == ' ') {
|
||||||
int len;
|
s = table[0][s - 'A'];
|
||||||
|
break;
|
||||||
if (buf_ptr[1] == 'b' || buf_ptr[1] == 'B')
|
|
||||||
len = strspn(buf_ptr + 2, "01") + 2;
|
|
||||||
else if (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')
|
|
||||||
len = strspn(buf_ptr + 2, "0123456789ABCDEFabcdef") + 2;
|
|
||||||
else
|
|
||||||
len = strspn(buf_ptr + 1, "012345678") + 1;
|
|
||||||
if (len > 0) {
|
|
||||||
CHECK_SIZE_TOKEN(len);
|
|
||||||
memcpy(e_token, buf_ptr, len);
|
|
||||||
e_token += len;
|
|
||||||
buf_ptr += len;
|
|
||||||
}
|
}
|
||||||
else
|
s = table[i][s - 'A'];
|
||||||
diag2(1, "Unterminated literal");
|
CHECK_SIZE_TOKEN(1);
|
||||||
}
|
*e_token++ = *buf_ptr++;
|
||||||
else /* base 10: */
|
if (buf_ptr >= buf_end)
|
||||||
while (1) {
|
fill_buffer();
|
||||||
if (*buf_ptr == '.') {
|
|
||||||
if (seendot)
|
|
||||||
break;
|
|
||||||
else
|
|
||||||
seendot++;
|
|
||||||
}
|
|
||||||
CHECK_SIZE_TOKEN(3);
|
|
||||||
*e_token++ = *buf_ptr++;
|
|
||||||
if (!isdigit((unsigned char)*buf_ptr) && *buf_ptr != '.') {
|
|
||||||
if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
|
|
||||||
break;
|
|
||||||
else {
|
|
||||||
seenexp++;
|
|
||||||
seendot++;
|
|
||||||
*e_token++ = *buf_ptr++;
|
|
||||||
if (*buf_ptr == '+' || *buf_ptr == '-')
|
|
||||||
*e_token++ = *buf_ptr++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
while (1) {
|
|
||||||
CHECK_SIZE_TOKEN(2);
|
|
||||||
if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) {
|
|
||||||
*e_token++ = *buf_ptr++;
|
|
||||||
seensfx |= 1;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) {
|
|
||||||
if (buf_ptr[1] == buf_ptr[0])
|
|
||||||
*e_token++ = *buf_ptr++;
|
|
||||||
*e_token++ = *buf_ptr++;
|
|
||||||
seensfx |= 2;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
/* s now indicates the type: f(loating), i(integer), u(nknown) */
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
while (chartype[*buf_ptr & 127] == alphanum || *buf_ptr == BACKSLASH) {
|
while (isalnum((unsigned char)*buf_ptr) ||
|
||||||
|
*buf_ptr == BACKSLASH ||
|
||||||
|
*buf_ptr == '_') {
|
||||||
/* fill_buffer() terminates buffer with newline */
|
/* fill_buffer() terminates buffer with newline */
|
||||||
if (*buf_ptr == BACKSLASH) {
|
if (*buf_ptr == BACKSLASH) {
|
||||||
if (*(buf_ptr + 1) == '\n') {
|
if (*(buf_ptr + 1) == '\n') {
|
||||||
@ -527,21 +501,11 @@ lexi(struct parser_state *state)
|
|||||||
case '=':
|
case '=':
|
||||||
if (state->in_or_st)
|
if (state->in_or_st)
|
||||||
state->block_init = 1;
|
state->block_init = 1;
|
||||||
#ifdef undef
|
|
||||||
if (chartype[*buf_ptr & 127] == opchar) { /* we have two char assignment */
|
|
||||||
e_token[-1] = *buf_ptr++;
|
|
||||||
if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
|
|
||||||
*e_token++ = *buf_ptr++;
|
|
||||||
*e_token++ = '='; /* Flip =+ to += */
|
|
||||||
*e_token = 0;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
if (*buf_ptr == '=') {/* == */
|
if (*buf_ptr == '=') {/* == */
|
||||||
*e_token++ = '='; /* Flip =+ to += */
|
*e_token++ = '='; /* Flip =+ to += */
|
||||||
buf_ptr++;
|
buf_ptr++;
|
||||||
*e_token = 0;
|
*e_token = 0;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
code = binary_op;
|
code = binary_op;
|
||||||
unary_delim = true;
|
unary_delim = true;
|
||||||
break;
|
break;
|
||||||
@ -625,6 +589,22 @@ lexi(struct parser_state *state)
|
|||||||
return (code);
|
return (code);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Initialize constant transition table */
|
||||||
|
void
|
||||||
|
init_constant_tt(void)
|
||||||
|
{
|
||||||
|
table['-'] = table['+'];
|
||||||
|
table['8'] = table['9'];
|
||||||
|
table['2'] = table['3'] = table['4'] = table['5'] = table['6'] = table['7'];
|
||||||
|
table['A'] = table['C'] = table['D'] = table['c'] = table['d'] = table['a'];
|
||||||
|
table['B'] = table['b'];
|
||||||
|
table['E'] = table['e'];
|
||||||
|
table['U'] = table['u'];
|
||||||
|
table['X'] = table['x'];
|
||||||
|
table['P'] = table['p'];
|
||||||
|
table['F'] = table['f'];
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
alloc_typenames(void)
|
alloc_typenames(void)
|
||||||
{
|
{
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
/* $FreeBSD$ */
|
/* $FreeBSD$ */
|
||||||
/* See r303499 */
|
|
||||||
void t(void) {
|
void t(void) {
|
||||||
unsigned long x = 314UL;
|
unsigned long x = 314UL;
|
||||||
float y = 3.14f;
|
double y[] = {0x1P+9F, 0.3, .1, 1.2f, 0xa.p01f, 3.14f, 2.L};
|
||||||
|
int z = 0b0101;
|
||||||
|
DO_NOTHING;
|
||||||
}
|
}
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
/* $FreeBSD$ */
|
/* $FreeBSD$ */
|
||||||
/* See r303499 */
|
|
||||||
void
|
void
|
||||||
t(void)
|
t(void)
|
||||||
{
|
{
|
||||||
unsigned long x = 314UL;
|
unsigned long x = 314UL;
|
||||||
float y = 3.14f;
|
double y[] = {0x1P+9F, 0.3, .1, 1.2f, 0xa.p01f, 3.14f, 2.L};
|
||||||
|
int z = 0b0101;
|
||||||
|
DO_NOTHING;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user