freebsd-nq/contrib/ntp/ntpd/ntp_scanner.c
Cy Schubert 2b15cb3d09 MFV ntp 4.2.8p1 (r258945, r275970, r276091, r276092, r276093, r278284)
Thanks to roberto for providing pointers to wedge this into HEAD.

Approved by:	roberto
2015-03-30 13:30:15 +00:00

756 lines
15 KiB
C

/* ntp_scanner.c
*
* The source code for a simple lexical analyzer.
*
* Written By: Sachin Kamboj
* University of Delaware
* Newark, DE 19711
* Copyright (c) 2006
*/
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include "ntpd.h"
#include "ntp_config.h"
#include "ntpsim.h"
#include "ntp_scanner.h"
#include "ntp_parser.h"
/* ntp_keyword.h declares finite state machine and token text */
#include "ntp_keyword.h"
/* SCANNER GLOBAL VARIABLES
* ------------------------
*/
#define MAX_LEXEME (1024 + 1) /* The maximum size of a lexeme */
char yytext[MAX_LEXEME]; /* Buffer for storing the input text/lexeme */
u_int32 conf_file_sum; /* Simple sum of characters read */
/* CONSTANTS
* ---------
*/
/* SCANNER GLOBAL VARIABLES
* ------------------------
*/
const char special_chars[] = "{}(),;|=";
/* FUNCTIONS
* ---------
*/
static int is_keyword(char *lexeme, follby *pfollowedby);
/*
* keyword() - Return the keyword associated with token T_ identifier.
* See also token_name() for the string-ized T_ identifier.
* Example: keyword(T_Server) returns "server"
* token_name(T_Server) returns "T_Server"
*/
const char *
keyword(
int token
)
{
size_t i;
const char *text;
i = token - LOWEST_KEYWORD_ID;
if (i < COUNTOF(keyword_text))
text = keyword_text[i];
else
text = NULL;
return (text != NULL)
? text
: "(keyword not found)";
}
/* FILE INTERFACE
* --------------
* We define a couple of wrapper functions around the standard C fgetc
* and ungetc functions in order to include positional bookkeeping
*/
struct FILE_INFO *
F_OPEN(
const char *path,
const char *mode
)
{
struct FILE_INFO *my_info;
my_info = emalloc(sizeof *my_info);
my_info->line_no = 1;
my_info->col_no = 0;
my_info->prev_line_col_no = 0;
my_info->prev_token_col_no = 0;
my_info->fname = path;
my_info->fd = fopen(path, mode);
if (NULL == my_info->fd) {
free(my_info);
return NULL;
}
return my_info;
}
int
FGETC(
struct FILE_INFO *stream
)
{
int ch;
do
ch = fgetc(stream->fd);
while (EOF != ch && (CHAR_MIN > ch || ch > CHAR_MAX));
if (EOF != ch) {
if (input_from_file)
conf_file_sum += (u_char)ch;
++stream->col_no;
if (ch == '\n') {
stream->prev_line_col_no = stream->col_no;
++stream->line_no;
stream->col_no = 1;
}
}
return ch;
}
/* BUGS: 1. Function will fail on more than one line of pushback
* 2. No error checking is done to see if ungetc fails
* SK: I don't think its worth fixing these bugs for our purposes ;-)
*/
int
UNGETC(
int ch,
struct FILE_INFO *stream
)
{
if (input_from_file)
conf_file_sum -= (u_char)ch;
if (ch == '\n') {
stream->col_no = stream->prev_line_col_no;
stream->prev_line_col_no = -1;
--stream->line_no;
}
--stream->col_no;
return ungetc(ch, stream->fd);
}
int
FCLOSE(
struct FILE_INFO *stream
)
{
int ret_val = fclose(stream->fd);
if (!ret_val)
free(stream);
return ret_val;
}
/* STREAM INTERFACE
* ----------------
* Provide a wrapper for the stream functions so that the
* stream can either read from a file or from a character
* array.
* NOTE: This is not very efficient for reading from character
* arrays, but needed to allow remote configuration where the
* configuration command is provided through ntpq.
*
* The behavior of there two functions is determined by the
* input_from_file flag.
*/
static int
get_next_char(
struct FILE_INFO *ip_file
)
{
char ch;
if (input_from_file)
return FGETC(ip_file);
else {
if (remote_config.buffer[remote_config.pos] == '\0')
return EOF;
else {
ip_file->col_no++;
ch = remote_config.buffer[remote_config.pos++];
if (ch == '\n') {
ip_file->prev_line_col_no = ip_file->col_no;
++ip_file->line_no;
ip_file->col_no = 1;
}
return ch;
}
}
}
static void
push_back_char(
struct FILE_INFO *ip_file,
int ch
)
{
if (input_from_file)
UNGETC(ch, ip_file);
else {
if (ch == '\n') {
ip_file->col_no = ip_file->prev_line_col_no;
ip_file->prev_line_col_no = -1;
--ip_file->line_no;
}
--ip_file->col_no;
remote_config.pos--;
}
}
/* STATE MACHINES
* --------------
*/
/* Keywords */
static int
is_keyword(
char *lexeme,
follby *pfollowedby
)
{
follby fb;
int curr_s; /* current state index */
int token;
int i;
curr_s = SCANNER_INIT_S;
token = 0;
for (i = 0; lexeme[i]; i++) {
while (curr_s && (lexeme[i] != SS_CH(sst[curr_s])))
curr_s = SS_OTHER_N(sst[curr_s]);
if (curr_s && (lexeme[i] == SS_CH(sst[curr_s]))) {
if ('\0' == lexeme[i + 1]
&& FOLLBY_NON_ACCEPTING
!= SS_FB(sst[curr_s])) {
fb = SS_FB(sst[curr_s]);
*pfollowedby = fb;
token = curr_s;
break;
}
curr_s = SS_MATCH_N(sst[curr_s]);
} else
break;
}
return token;
}
/* Integer */
static int
is_integer(
char *lexeme
)
{
int i;
int is_neg;
u_int u_val;
i = 0;
/* Allow a leading minus sign */
if (lexeme[i] == '-') {
i++;
is_neg = TRUE;
} else {
is_neg = FALSE;
}
/* Check that all the remaining characters are digits */
for (; lexeme[i] != '\0'; i++) {
if (!isdigit((unsigned char)lexeme[i]))
return FALSE;
}
if (is_neg)
return TRUE;
/* Reject numbers that fit in unsigned but not in signed int */
if (1 == sscanf(lexeme, "%u", &u_val))
return (u_val <= INT_MAX);
else
return FALSE;
}
/* U_int -- assumes is_integer() has returned FALSE */
static int
is_u_int(
char *lexeme
)
{
int i;
int is_hex;
i = 0;
if ('0' == lexeme[i] && 'x' == tolower((unsigned char)lexeme[i + 1])) {
i += 2;
is_hex = TRUE;
} else {
is_hex = FALSE;
}
/* Check that all the remaining characters are digits */
for (; lexeme[i] != '\0'; i++) {
if (is_hex && !isxdigit((unsigned char)lexeme[i]))
return FALSE;
if (!is_hex && !isdigit((unsigned char)lexeme[i]))
return FALSE;
}
return TRUE;
}
/* Double */
static int
is_double(
char *lexeme
)
{
u_int num_digits = 0; /* Number of digits read */
u_int i;
i = 0;
/* Check for an optional '+' or '-' */
if ('+' == lexeme[i] || '-' == lexeme[i])
i++;
/* Read the integer part */
for (; lexeme[i] && isdigit((unsigned char)lexeme[i]); i++)
num_digits++;
/* Check for the optional decimal point */
if ('.' == lexeme[i]) {
i++;
/* Check for any digits after the decimal point */
for (; lexeme[i] && isdigit((unsigned char)lexeme[i]); i++)
num_digits++;
}
/*
* The number of digits in both the decimal part and the
* fraction part must not be zero at this point
*/
if (!num_digits)
return 0;
/* Check if we are done */
if (!lexeme[i])
return 1;
/* There is still more input, read the exponent */
if ('e' == tolower((unsigned char)lexeme[i]))
i++;
else
return 0;
/* Read an optional Sign */
if ('+' == lexeme[i] || '-' == lexeme[i])
i++;
/* Now read the exponent part */
while (lexeme[i] && isdigit((unsigned char)lexeme[i]))
i++;
/* Check if we are done */
if (!lexeme[i])
return 1;
else
return 0;
}
/* is_special() - Test whether a character is a token */
static inline int
is_special(
int ch
)
{
return strchr(special_chars, ch) != NULL;
}
static int
is_EOC(
int ch
)
{
if ((old_config_style && (ch == '\n')) ||
(!old_config_style && (ch == ';')))
return 1;
return 0;
}
char *
quote_if_needed(char *str)
{
char *ret;
size_t len;
size_t octets;
len = strlen(str);
octets = len + 2 + 1;
ret = emalloc(octets);
if ('"' != str[0]
&& (strcspn(str, special_chars) < len
|| strchr(str, ' ') != NULL)) {
snprintf(ret, octets, "\"%s\"", str);
} else
strlcpy(ret, str, octets);
return ret;
}
static int
create_string_token(
char *lexeme
)
{
char *pch;
/*
* ignore end of line whitespace
*/
pch = lexeme;
while (*pch && isspace((unsigned char)*pch))
pch++;
if (!*pch) {
yylval.Integer = T_EOC;
return yylval.Integer;
}
yylval.String = estrdup(lexeme);
return T_String;
}
/*
* yylex() - function that does the actual scanning.
* Bison expects this function to be called yylex and for it to take no
* input and return an int.
* Conceptually yylex "returns" yylval as well as the actual return
* value representing the token or type.
*/
int
yylex(
struct FILE_INFO *ip_file
)
{
static follby followedby = FOLLBY_TOKEN;
size_t i;
int instring;
int yylval_was_set;
int converted;
int token; /* The return value */
int ch;
if (input_from_file)
ip_file = fp[curr_include_level];
instring = FALSE;
yylval_was_set = FALSE;
do {
/* Ignore whitespace at the beginning */
while (EOF != (ch = get_next_char(ip_file)) &&
isspace(ch) &&
!is_EOC(ch))
; /* Null Statement */
if (EOF == ch) {
if (!input_from_file || curr_include_level <= 0)
return 0;
FCLOSE(fp[curr_include_level]);
ip_file = fp[--curr_include_level];
token = T_EOC;
goto normal_return;
} else if (is_EOC(ch)) {
/* end FOLLBY_STRINGS_TO_EOC effect */
followedby = FOLLBY_TOKEN;
token = T_EOC;
goto normal_return;
} else if (is_special(ch) && FOLLBY_TOKEN == followedby) {
/* special chars are their own token values */
token = ch;
/*
* '=' outside simulator configuration implies
* a single string following as in:
* setvar Owner = "The Boss" default
*/
if ('=' == ch && old_config_style)
followedby = FOLLBY_STRING;
yytext[0] = (char)ch;
yytext[1] = '\0';
goto normal_return;
} else
push_back_char(ip_file, ch);
/* save the position of start of the token */
ip_file->prev_token_line_no = ip_file->line_no;
ip_file->prev_token_col_no = ip_file->col_no;
/* Read in the lexeme */
i = 0;
while (EOF != (ch = get_next_char(ip_file))) {
yytext[i] = (char)ch;
/* Break on whitespace or a special character */
if (isspace(ch) || is_EOC(ch)
|| '"' == ch
|| (FOLLBY_TOKEN == followedby
&& is_special(ch)))
break;
/* Read the rest of the line on reading a start
of comment character */
if ('#' == ch) {
while (EOF != (ch = get_next_char(ip_file))
&& '\n' != ch)
; /* Null Statement */
break;
}
i++;
if (i >= COUNTOF(yytext))
goto lex_too_long;
}
/* Pick up all of the string inside between " marks, to
* end of line. If we make it to EOL without a
* terminating " assume it for them.
*
* XXX - HMS: I'm not sure we want to assume the closing "
*/
if ('"' == ch) {
instring = TRUE;
while (EOF != (ch = get_next_char(ip_file)) &&
ch != '"' && ch != '\n') {
yytext[i++] = (char)ch;
if (i >= COUNTOF(yytext))
goto lex_too_long;
}
/*
* yytext[i] will be pushed back as not part of
* this lexeme, but any closing quote should
* not be pushed back, so we read another char.
*/
if ('"' == ch)
ch = get_next_char(ip_file);
}
/* Pushback the last character read that is not a part
* of this lexeme.
* If the last character read was an EOF, pushback a
* newline character. This is to prevent a parse error
* when there is no newline at the end of a file.
*/
if (EOF == ch)
push_back_char(ip_file, '\n');
else
push_back_char(ip_file, ch);
yytext[i] = '\0';
} while (i == 0);
/* Now return the desired token */
/* First make sure that the parser is *not* expecting a string
* as the next token (based on the previous token that was
* returned) and that we haven't read a string.
*/
if (followedby == FOLLBY_TOKEN && !instring) {
token = is_keyword(yytext, &followedby);
if (token) {
/*
* T_Server is exceptional as it forces the
* following token to be a string in the
* non-simulator parts of the configuration,
* but in the simulator configuration section,
* "server" is followed by "=" which must be
* recognized as a token not a string.
*/
if (T_Server == token && !old_config_style)
followedby = FOLLBY_TOKEN;
goto normal_return;
} else if (is_integer(yytext)) {
yylval_was_set = TRUE;
errno = 0;
if ((yylval.Integer = strtol(yytext, NULL, 10)) == 0
&& ((errno == EINVAL) || (errno == ERANGE))) {
msyslog(LOG_ERR,
"Integer cannot be represented: %s",
yytext);
if (input_from_file) {
exit(1);
} else {
/* force end of parsing */
yylval.Integer = 0;
return 0;
}
}
token = T_Integer;
goto normal_return;
} else if (is_u_int(yytext)) {
yylval_was_set = TRUE;
if ('0' == yytext[0] &&
'x' == tolower((unsigned char)yytext[1]))
converted = sscanf(&yytext[2], "%x",
&yylval.U_int);
else
converted = sscanf(yytext, "%u",
&yylval.U_int);
if (1 != converted) {
msyslog(LOG_ERR,
"U_int cannot be represented: %s",
yytext);
if (input_from_file) {
exit(1);
} else {
/* force end of parsing */
yylval.Integer = 0;
return 0;
}
}
token = T_U_int;
goto normal_return;
} else if (is_double(yytext)) {
yylval_was_set = TRUE;
errno = 0;
if ((yylval.Double = atof(yytext)) == 0 && errno == ERANGE) {
msyslog(LOG_ERR,
"Double too large to represent: %s",
yytext);
exit(1);
} else {
token = T_Double;
goto normal_return;
}
} else {
/* Default: Everything is a string */
yylval_was_set = TRUE;
token = create_string_token(yytext);
goto normal_return;
}
}
/*
* Either followedby is not FOLLBY_TOKEN or this lexeme is part
* of a string. Hence, we need to return T_String.
*
* _Except_ we might have a -4 or -6 flag on a an association
* configuration line (server, peer, pool, etc.).
*
* This is a terrible hack, but the grammar is ambiguous so we
* don't have a choice. [SK]
*
* The ambiguity is in the keyword scanner, not ntp_parser.y.
* We do not require server addresses be quoted in ntp.conf,
* complicating the scanner's job. To avoid trying (and
* failing) to match an IP address or DNS name to a keyword,
* the association keywords use FOLLBY_STRING in the keyword
* table, which tells the scanner to force the next token to be
* a T_String, so it does not try to match a keyword but rather
* expects a string when -4/-6 modifiers to server, peer, etc.
* are encountered.
* restrict -4 and restrict -6 parsing works correctly without
* this hack, as restrict uses FOLLBY_TOKEN. [DH]
*/
if ('-' == yytext[0]) {
if ('4' == yytext[1]) {
token = T_Ipv4_flag;
goto normal_return;
} else if ('6' == yytext[1]) {
token = T_Ipv6_flag;
goto normal_return;
}
}
instring = FALSE;
if (FOLLBY_STRING == followedby)
followedby = FOLLBY_TOKEN;
yylval_was_set = TRUE;
token = create_string_token(yytext);
normal_return:
if (T_EOC == token)
DPRINTF(4,("\t<end of command>\n"));
else
DPRINTF(4, ("yylex: lexeme '%s' -> %s\n", yytext,
token_name(token)));
if (!yylval_was_set)
yylval.Integer = token;
return token;
lex_too_long:
yytext[min(sizeof(yytext) - 1, 50)] = 0;
msyslog(LOG_ERR,
"configuration item on line %d longer than limit of %lu, began with '%s'",
ip_file->line_no, (u_long)min(sizeof(yytext) - 1, 50),
yytext);
/*
* If we hit the length limit reading the startup configuration
* file, abort.
*/
if (input_from_file)
exit(sizeof(yytext) - 1);
/*
* If it's runtime configuration via ntpq :config treat it as
* if the configuration text ended before the too-long lexeme,
* hostname, or string.
*/
yylval.Integer = 0;
return 0;
}