indent(1): improve CHECK_SIZE_ macros

Rewrite the macros so that they take a parameter. Consumers use it to signal how much room in the buffer they need; this lets them do that once when required space is known instead of doing the check once every loop step. Also take the parameter value into consideration when resizing the buffer; the requested space may be larger than the constant 400 bytes that the previous version used - now it's the sum of those two values. On the consumer side, don't copy strings byte by byte - use memcpy(). Deduplicate code that copied base 2, base 8 and base 16 literals. Don't advance the e_token pointer once the token has been copied into s_token. This allows easy calculation of the token's length.
svn path=/head/; revision=334576
2018-06-03 18:19:41 +00:00 · 2018-06-03 18:19:41 +00:00 · 9de29bfb5a · 2020-12-20 02:59:44 +00:00
commit 9de29bfb5a
parent 1479f36d8e
4 changed files with 110 additions and 95 deletions
--- a/usr.bin/indent/indent.c
+++ b/usr.bin/indent/indent.c
@ -520,11 +520,12 @@ main(int argc, char **argv)
 				 * '}' */
 	    if (s_com != e_com) {	/* the turkey has embedded a comment
 					 * in a line. fix it */
+		int len = e_com - s_com;
+
+		CHECK_SIZE_CODE(len + 3);
 		*e_code++ = ' ';
-		for (t_ptr = s_com; *t_ptr; ++t_ptr) {
-		    CHECK_SIZE_CODE;
-		    *e_code++ = *t_ptr;
-		}
+		memcpy(e_code, s_com, len);
+		e_code += len;
 		*e_code++ = ' ';
 		*e_code = '\0';	/* null terminate code sect */
 		ps.want_blank = false;
@ -540,7 +541,10 @@ main(int argc, char **argv)
 	/*-----------------------------------------------------*\
 	|	   do switch on type of token scanned		|
 	\*-----------------------------------------------------*/
-	CHECK_SIZE_CODE;
+	CHECK_SIZE_CODE(3);	/* maximum number of increments of e_code
+				 * before the next CHECK_SIZE_CODE or
+				 * dump_line() is 2. After that there's the
+				 * final increment for the null character. */
 	switch (type_code) {	/* now, decide what to do with the token */

 	case form_feed:	/* found a form feed in line */
@ -651,19 +655,25 @@ main(int argc, char **argv)
 	    else if (ps.want_blank)
 		*e_code++ = ' ';

-	    for (t_ptr = token; *t_ptr; ++t_ptr) {
-		CHECK_SIZE_CODE;
-		*e_code++ = *t_ptr;
+	    {
+		int len = e_token - s_token;
+
+		CHECK_SIZE_CODE(len);
+		memcpy(e_code, token, len);
+		e_code += len;
 	    }
 	    ps.want_blank = false;
 	    break;

 	case binary_op:	/* any binary operation */
-	    if (ps.want_blank)
-		*e_code++ = ' ';
-	    for (t_ptr = token; *t_ptr; ++t_ptr) {
-		CHECK_SIZE_CODE;
-		*e_code++ = *t_ptr;	/* move the operator */
+	    {
+		int len = e_token - s_token;
+
+		CHECK_SIZE_CODE(len + 1);
+		if (ps.want_blank)
+		    *e_code++ = ' ';
+		memcpy(e_code, token, len);
+		e_code += len;
 	    }
 	    ps.want_blank = true;
 	    break;
@ -704,13 +714,20 @@ main(int argc, char **argv)
 	    }
 	    ps.in_stmt = false;	/* seeing a label does not imply we are in a
 				 * stmt */
-	    for (t_ptr = s_code; *t_ptr; ++t_ptr)
-		*e_lab++ = *t_ptr;	/* turn everything so far into a label */
-	    e_code = s_code;
-	    *e_lab++ = ':';
-	    *e_lab++ = ' ';
-	    *e_lab = '\0';
+	    /*
+	     * turn everything so far into a label
+	     */
+	    {
+		int len = e_code - s_code;

+		CHECK_SIZE_LAB(len + 3);
+		memcpy(e_lab, s_code, len);
+		e_lab += len;
+		*e_lab++ = ':';
+		*e_lab++ = ' ';
+		*e_lab = '\0';
+		e_code = s_code;
+	    }
 	    force_nl = ps.pcase = scase;	/* ps.pcase will be used by
 						 * dump_line to decide how to
 						 * indent the label. force_nl
@ -986,22 +1003,28 @@ main(int argc, char **argv)
 		parse(hd_type);
 	    }
    copy_id:
-	    if (ps.want_blank)
-		*e_code++ = ' ';
-	    for (t_ptr = token; *t_ptr; ++t_ptr) {
-		CHECK_SIZE_CODE;
-		*e_code++ = *t_ptr;
+	    {
+		int len = e_token - s_token;
+
+		CHECK_SIZE_CODE(len + 1);
+		if (ps.want_blank)
+		    *e_code++ = ' ';
+		memcpy(e_code, s_token, len);
+		e_code += len;
 	    }
 	    if (type_code != funcname)
 		ps.want_blank = true;
 	    break;

 	case strpfx:
-	    if (ps.want_blank)
-		*e_code++ = ' ';
-	    for (t_ptr = token; *t_ptr; ++t_ptr) {
-		CHECK_SIZE_CODE;
-		*e_code++ = *t_ptr;
+	    {
+		int len = e_token - s_token;
+
+		CHECK_SIZE_CODE(len + 1);
+		if (ps.want_blank)
+		    *e_code++ = ' ';
+		memcpy(e_code, token, len);
+		e_code += len;
 	    }
 	    ps.want_blank = false;
 	    break;
@ -1038,6 +1061,7 @@ main(int argc, char **argv)
 		    (s_lab != e_lab) ||
 		    (s_code != e_code))
 		dump_line();
+	    CHECK_SIZE_LAB(1);
 	    *e_lab++ = '#';	/* move whole line to 'label' buffer */
 	    {
 		int         in_comment = 0;
@ -1051,7 +1075,7 @@ main(int argc, char **argv)
 			fill_buffer();
 		}
 		while (*buf_ptr != '\n' || (in_comment && !had_eof)) {
-		    CHECK_SIZE_LAB;
+		    CHECK_SIZE_LAB(2);
 		    *e_lab = *buf_ptr++;
 		    if (buf_ptr >= buf_end)
 			fill_buffer();
@ -1119,6 +1143,7 @@ main(int argc, char **argv)
 		    buf_end = sc_end;
 		    sc_end = NULL;
 		}
+		CHECK_SIZE_LAB(1);
 		*e_lab = '\0';	/* null terminate line */
 		ps.pcase = false;
 	    }
@ -1249,14 +1274,14 @@ indent_declaration(int cur_dec_ind, int tabs_to_var)
    if (tabs_to_var) {
 	int tpos;

+	CHECK_SIZE_CODE(cur_dec_ind / tabsize);
 	while ((tpos = tabsize * (1 + pos / tabsize)) <= cur_dec_ind) {
-	    CHECK_SIZE_CODE;
 	    *e_code++ = '\t';
 	    pos = tpos;
 	}
    }
+    CHECK_SIZE_CODE(cur_dec_ind - pos + 1);
    while (pos < cur_dec_ind) {
-	CHECK_SIZE_CODE;
 	*e_code++ = ' ';
 	pos++;
    }
--- a/usr.bin/indent/indent_globs.h
+++ b/usr.bin/indent/indent_globs.h
@ -52,9 +52,9 @@
 FILE       *input;		/* the fid for the input file */
 FILE       *output;		/* the output file */

-#define CHECK_SIZE_CODE \
-	if (e_code >= l_code) { \
-	    int nsize = l_code-s_code+400; \
+#define CHECK_SIZE_CODE(desired_size) \
+	if (e_code + (desired_size) >= l_code) { \
+	    int nsize = l_code-s_code + 400 + desired_size; \
 	    int code_len = e_code-s_code; \
 	    codebuf = (char *) realloc(codebuf, nsize); \
 	    if (codebuf == NULL) \
@ -63,9 +63,9 @@ FILE       *output;		/* the output file */
 	    l_code = codebuf + nsize - 5; \
 	    s_code = codebuf + 1; \
 	}
-#define CHECK_SIZE_COM \
-	if (e_com >= l_com) { \
-	    int nsize = l_com-s_com+400; \
+#define CHECK_SIZE_COM(desired_size) \
+	if (e_com + (desired_size) >= l_com) { \
+	    int nsize = l_com-s_com + 400 + desired_size; \
 	    int com_len = e_com - s_com; \
 	    int blank_pos; \
 	    if (last_bl != NULL) \
@ -81,9 +81,9 @@ FILE       *output;		/* the output file */
 	    l_com = combuf + nsize - 5; \
 	    s_com = combuf + 1; \
 	}
-#define CHECK_SIZE_LAB \
-	if (e_lab >= l_lab) { \
-	    int nsize = l_lab-s_lab+400; \
+#define CHECK_SIZE_LAB(desired_size) \
+	if (e_lab + (desired_size) >= l_lab) { \
+	    int nsize = l_lab-s_lab + 400 + desired_size; \
 	    int label_len = e_lab - s_lab; \
 	    labbuf = (char *) realloc(labbuf, nsize); \
 	    if (labbuf == NULL) \
@ -92,9 +92,9 @@ FILE       *output;		/* the output file */
 	    l_lab = labbuf + nsize - 5; \
 	    s_lab = labbuf + 1; \
 	}
-#define CHECK_SIZE_TOKEN \
-	if (e_token >= l_token) { \
-	    int nsize = l_token-s_token+400; \
+#define CHECK_SIZE_TOKEN(desired_size) \
+	if (e_token + (desired_size) >= l_token) { \
+	    int nsize = l_token-s_token + 400 + desired_size; \
 	    int token_len = e_token - s_token; \
 	    tokenbuf = (char *) realloc(tokenbuf, nsize); \
 	    if (tokenbuf == NULL) \
--- a/usr.bin/indent/lexi.c
+++ b/usr.bin/indent/lexi.c
@ -182,47 +182,32 @@ lexi(struct parser_state *state)

 	if (isdigit((unsigned char)*buf_ptr) ||
 	    (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
-	    enum base {
-		BASE_2, BASE_8, BASE_10, BASE_16
-	    };
 	    int         seendot = 0,
 	                seenexp = 0,
 			seensfx = 0;
-	    enum base	in_base = BASE_10;

-	    if (*buf_ptr == '0') {
+	    /*
+	     * base 2, base 8, base 16:
+	     */
+	    if (buf_ptr[0] == '0' && buf_ptr[1] != '.') {
+		int len;
+
 		if (buf_ptr[1] == 'b' || buf_ptr[1] == 'B')
-		    in_base = BASE_2;
+		    len = strspn(buf_ptr + 2, "01") + 2;
 		else if (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')
-		    in_base = BASE_16;
-		else if (isdigit((unsigned char)buf_ptr[1]))
-		    in_base = BASE_8;
+		    len = strspn(buf_ptr + 2, "0123456789ABCDEFabcdef") + 2;
+		else
+		    len = strspn(buf_ptr + 1, "012345678") + 1;
+		if (len > 0) {
+		    CHECK_SIZE_TOKEN(len);
+		    memcpy(e_token, buf_ptr, len);
+		    e_token += len;
+		    buf_ptr += len;
+		}
+		else
+		    diag2(1, "Unterminated literal");
 	    }
-	    switch (in_base) {
-	    case BASE_2:
-		*e_token++ = *buf_ptr++;
-		*e_token++ = *buf_ptr++;
-		while (*buf_ptr == '0' || *buf_ptr == '1') {
-		    CHECK_SIZE_TOKEN;
-		    *e_token++ = *buf_ptr++;
-		}
-		break;
-	    case BASE_8:
-		*e_token++ = *buf_ptr++;
-		while (*buf_ptr >= '0' && *buf_ptr <= '8') {
-		    CHECK_SIZE_TOKEN;
-		    *e_token++ = *buf_ptr++;
-		}
-		break;
-	    case BASE_16:
-		*e_token++ = *buf_ptr++;
-		*e_token++ = *buf_ptr++;
-		while (isxdigit((unsigned char)*buf_ptr)) {
-		    CHECK_SIZE_TOKEN;
-		    *e_token++ = *buf_ptr++;
-		}
-		break;
-	    case BASE_10:
+	    else		/* base 10: */
 		while (1) {
 		    if (*buf_ptr == '.') {
 			if (seendot)
@ -230,7 +215,7 @@ lexi(struct parser_state *state)
 			else
 			    seendot++;
 		    }
-		    CHECK_SIZE_TOKEN;
+		    CHECK_SIZE_TOKEN(3);
 		    *e_token++ = *buf_ptr++;
 		    if (!isdigit((unsigned char)*buf_ptr) && *buf_ptr != '.') {
 			if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
@ -238,24 +223,21 @@ lexi(struct parser_state *state)
 			else {
 			    seenexp++;
 			    seendot++;
-			    CHECK_SIZE_TOKEN;
 			    *e_token++ = *buf_ptr++;
 			    if (*buf_ptr == '+' || *buf_ptr == '-')
 				*e_token++ = *buf_ptr++;
 			}
 		    }
 		}
-		break;
-	    }
+
 	    while (1) {
+		CHECK_SIZE_TOKEN(2);
 		if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) {
-		    CHECK_SIZE_TOKEN;
 		    *e_token++ = *buf_ptr++;
 		    seensfx |= 1;
 		    continue;
 		}
 		if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) {
-		    CHECK_SIZE_TOKEN;
 		    if (buf_ptr[1] == buf_ptr[0])
 		        *e_token++ = *buf_ptr++;
 		    *e_token++ = *buf_ptr++;
@ -276,13 +258,13 @@ lexi(struct parser_state *state)
 			} else
 			    break;
 		}
-		CHECK_SIZE_TOKEN;
+		CHECK_SIZE_TOKEN(1);
 		/* copy it over */
 		*e_token++ = *buf_ptr++;
 		if (buf_ptr >= buf_end)
 		    fill_buffer();
 	    }
-	*e_token++ = '\0';
+	*e_token = '\0';

 	if (s_token[0] == 'L' && s_token[1] == '\0' &&
 	      (*buf_ptr == '"' || *buf_ptr == '\''))
@ -397,6 +379,7 @@ lexi(struct parser_state *state)

    /* Scan a non-alphanumeric token */

+    CHECK_SIZE_TOKEN(3);		/* things like "<<=" */
    *e_token++ = *buf_ptr;		/* if it is only a one-character token, it is
 				 * moved here */
    *e_token = '\0';
@ -424,9 +407,7 @@ lexi(struct parser_state *state)
 		    diag2(1, "Unterminated literal");
 		    goto stop_lit;
 		}
-		CHECK_SIZE_TOKEN;	/* Only have to do this once in this loop,
-					 * since CHECK_SIZE guarantees that there
-					 * are at least 5 entries left */
+		CHECK_SIZE_TOKEN(2);
 		*e_token = *buf_ptr++;
 		if (buf_ptr >= buf_end)
 		    fill_buffer();
@ -585,8 +566,10 @@ lexi(struct parser_state *state)
 	    break;
 	}
 	while (*buf_ptr == '*' || isspace((unsigned char)*buf_ptr)) {
-	    if (*buf_ptr == '*')
+	    if (*buf_ptr == '*') {
+		CHECK_SIZE_TOKEN(1);
 		*e_token++ = *buf_ptr;
+	    }
 	    if (++buf_ptr >= buf_end)
 		fill_buffer();
 	}
@ -620,6 +603,7 @@ lexi(struct parser_state *state)
 	    /*
 	     * handle ||, &&, etc, and also things as in int *****i
 	     */
+	    CHECK_SIZE_TOKEN(1);
 	    *e_token++ = *buf_ptr;
 	    if (++buf_ptr >= buf_end)
 		fill_buffer();
@ -632,6 +616,7 @@ lexi(struct parser_state *state)
    if (buf_ptr >= buf_end)	/* check for input buffer empty */
 	fill_buffer();
    state->last_u_d = unary_delim;
+    CHECK_SIZE_TOKEN(1);
    *e_token = '\0';		/* null terminate the token */
    return (code);
 }
--- a/usr.bin/indent/pr_comment.c
+++ b/usr.bin/indent/pr_comment.c
@ -205,9 +205,9 @@ pr_comment(void)

    while (1) {			/* this loop will go until the comment is
 				 * copied */
-	CHECK_SIZE_COM;
 	switch (*buf_ptr) {	/* this checks for various spcl cases */
 	case 014:		/* check for a form feed */
+	    CHECK_SIZE_COM(3);
 	    if (!ps.box_com) {	/* in a text comment, break the line here */
 		ps.use_ff = true;
 		/* fix so dump_line uses a form feed */
@ -232,6 +232,7 @@ pr_comment(void)
 		return;
 	    }
 	    last_bl = NULL;
+	    CHECK_SIZE_COM(4);
 	    if (ps.box_com || ps.last_nl) {	/* if this is a boxed comment,
 						 * we dont ignore the newline */
 		if (s_com == e_com)
@ -255,7 +256,6 @@ pr_comment(void)
 		 */
 		else {		/* otherwise, insert one */
 		    last_bl = e_com;
-		    CHECK_SIZE_COM;
 		    *e_com++ = ' ';
 		}
 	    }
@ -282,12 +282,11 @@ pr_comment(void)
 				 * of comment */
 	    if (++buf_ptr >= buf_end)	/* get to next char after * */
 		fill_buffer();
-
+	    CHECK_SIZE_COM(4);
 	    if (*buf_ptr == '/') {	/* it is the end!!! */
 	end_of_comment:
 		if (++buf_ptr >= buf_end)
 		    fill_buffer();
-		CHECK_SIZE_COM;
 		if (break_delim) {
 		    if (e_com > s_com + 3) {
 			dump_line();
@ -308,6 +307,7 @@ pr_comment(void)
 	default:		/* we have a random char */
 	    now_col = count_spaces_until(ps.com_col, s_com, e_com);
 	    do {
+		CHECK_SIZE_COM(1);
 		*e_com = *buf_ptr++;
 		if (buf_ptr >= buf_end)
 		    fill_buffer();
@ -337,11 +337,16 @@ pr_comment(void)
 		    t_ptr++)
 			;
 		last_bl = NULL;
+		/*
+		 * t_ptr will be somewhere between e_com (dump_line() reset)
+		 * and l_com. So it's safe to copy byte by byte from t_ptr
+		 * to e_com without any CHECK_SIZE_COM().
+		 */
 		while (*t_ptr != '\0') {
 		    if (*t_ptr == ' ' || *t_ptr == '\t')
 			last_bl = e_com;
 		    *e_com++ = *t_ptr++;
- 		}
+		}
 	    }
 	    break;
 	}