freebsd-dev/contrib/tcl/generic/tclParse.c
1997-10-01 13:19:13 +00:00

939 lines
24 KiB
C
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* tclParse.c --
*
* This file contains a collection of procedures that are used
* to parse Tcl commands or parts of commands (like quoted
* strings or nested sub-commands).
*
* Copyright (c) 1987-1993 The Regents of the University of California.
* Copyright (c) 1994-1997 Sun Microsystems, Inc.
*
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
* SCCS: @(#) tclParse.c 1.56 97/07/29 18:40:03
*/
#include "tclInt.h"
#include "tclPort.h"
/*
* Function prototypes for procedures local to this file:
*/
static char * QuoteEnd _ANSI_ARGS_((char *string, char *lastChar,
int term));
static char * ScriptEnd _ANSI_ARGS_((char *p, char *lastChar,
int nested));
static char * VarNameEnd _ANSI_ARGS_((char *string, char *lastChar));
/*
*--------------------------------------------------------------
*
* TclParseQuotes --
*
* This procedure parses a double-quoted string such as a
* quoted Tcl command argument or a quoted value in a Tcl
* expression. This procedure is also used to parse array
* element names within parentheses, or anything else that
* needs all the substitutions that happen in quotes.
*
* Results:
* The return value is a standard Tcl result, which is
* TCL_OK unless there was an error while parsing the
* quoted string. If an error occurs then interp->result
* contains a standard error message. *TermPtr is filled
* in with the address of the character just after the
* last one successfully processed; this is usually the
* character just after the matching close-quote. The
* fully-substituted contents of the quotes are stored in
* standard fashion in *pvPtr, null-terminated with
* pvPtr->next pointing to the terminating null character.
*
* Side effects:
* The buffer space in pvPtr may be enlarged by calling its
* expandProc.
*
*--------------------------------------------------------------
*/
int
TclParseQuotes(interp, string, termChar, flags, termPtr, pvPtr)
Tcl_Interp *interp; /* Interpreter to use for nested command
* evaluations and error messages. */
char *string; /* Character just after opening double-
* quote. */
int termChar; /* Character that terminates "quoted" string
* (usually double-quote, but sometimes
* right-paren or something else). */
int flags; /* Flags to pass to nested Tcl_Eval calls. */
char **termPtr; /* Store address of terminating character
* here. */
ParseValue *pvPtr; /* Information about where to place
* fully-substituted result of parse. */
{
register char *src, *dst, c;
char *lastChar = string + strlen(string);
src = string;
dst = pvPtr->next;
while (1) {
if (dst == pvPtr->end) {
/*
* Target buffer space is about to run out. Make more space.
*/
pvPtr->next = dst;
(*pvPtr->expandProc)(pvPtr, 1);
dst = pvPtr->next;
}
c = *src;
src++;
if (c == termChar) {
*dst = '\0';
pvPtr->next = dst;
*termPtr = src;
return TCL_OK;
} else if (CHAR_TYPE(src-1, lastChar) == TCL_NORMAL) {
copy:
*dst = c;
dst++;
continue;
} else if (c == '$') {
int length;
char *value;
value = Tcl_ParseVar(interp, src-1, termPtr);
if (value == NULL) {
return TCL_ERROR;
}
src = *termPtr;
length = strlen(value);
if ((pvPtr->end - dst) <= length) {
pvPtr->next = dst;
(*pvPtr->expandProc)(pvPtr, length);
dst = pvPtr->next;
}
strcpy(dst, value);
dst += length;
continue;
} else if (c == '[') {
int result;
pvPtr->next = dst;
result = TclParseNestedCmd(interp, src, flags, termPtr, pvPtr);
if (result != TCL_OK) {
return result;
}
src = *termPtr;
dst = pvPtr->next;
continue;
} else if (c == '\\') {
int numRead;
src--;
*dst = Tcl_Backslash(src, &numRead);
dst++;
src += numRead;
continue;
} else if (c == '\0') {
char buf[30];
Tcl_ResetResult(interp);
sprintf(buf, "missing %c", termChar);
Tcl_SetResult(interp, buf, TCL_VOLATILE);
*termPtr = string-1;
return TCL_ERROR;
} else {
goto copy;
}
}
}
/*
*--------------------------------------------------------------
*
* TclParseNestedCmd --
*
* This procedure parses a nested Tcl command between
* brackets, returning the result of the command.
*
* Results:
* The return value is a standard Tcl result, which is
* TCL_OK unless there was an error while executing the
* nested command. If an error occurs then interp->result
* contains a standard error message. *TermPtr is filled
* in with the address of the character just after the
* last one processed; this is usually the character just
* after the matching close-bracket, or the null character
* at the end of the string if the close-bracket was missing
* (a missing close bracket is an error). The result returned
* by the command is stored in standard fashion in *pvPtr,
* null-terminated, with pvPtr->next pointing to the null
* character.
*
* Side effects:
* The storage space at *pvPtr may be expanded.
*
*--------------------------------------------------------------
*/
int
TclParseNestedCmd(interp, string, flags, termPtr, pvPtr)
Tcl_Interp *interp; /* Interpreter to use for nested command
* evaluations and error messages. */
char *string; /* Character just after opening bracket. */
int flags; /* Flags to pass to nested Tcl_Eval. */
char **termPtr; /* Store address of terminating character
* here. */
register ParseValue *pvPtr; /* Information about where to place
* result of command. */
{
int result, length, shortfall;
Interp *iPtr = (Interp *) interp;
iPtr->evalFlags = flags | TCL_BRACKET_TERM;
result = Tcl_Eval(interp, string);
*termPtr = (string + iPtr->termOffset);
if (result != TCL_OK) {
/*
* The increment below results in slightly cleaner message in
* the errorInfo variable (the close-bracket will appear).
*/
if (**termPtr == ']') {
*termPtr += 1;
}
return result;
}
(*termPtr) += 1;
length = strlen(iPtr->result);
shortfall = length + 1 - (pvPtr->end - pvPtr->next);
if (shortfall > 0) {
(*pvPtr->expandProc)(pvPtr, shortfall);
}
strcpy(pvPtr->next, iPtr->result);
pvPtr->next += length;
Tcl_FreeResult(interp);
iPtr->result = iPtr->resultSpace;
iPtr->resultSpace[0] = '\0';
return TCL_OK;
}
/*
*--------------------------------------------------------------
*
* TclParseBraces --
*
* This procedure scans the information between matching
* curly braces.
*
* Results:
* The return value is a standard Tcl result, which is
* TCL_OK unless there was an error while parsing string.
* If an error occurs then interp->result contains a
* standard error message. *TermPtr is filled
* in with the address of the character just after the
* last one successfully processed; this is usually the
* character just after the matching close-brace. The
* information between curly braces is stored in standard
* fashion in *pvPtr, null-terminated with pvPtr->next
* pointing to the terminating null character.
*
* Side effects:
* The storage space at *pvPtr may be expanded.
*
*--------------------------------------------------------------
*/
int
TclParseBraces(interp, string, termPtr, pvPtr)
Tcl_Interp *interp; /* Interpreter to use for nested command
* evaluations and error messages. */
char *string; /* Character just after opening bracket. */
char **termPtr; /* Store address of terminating character
* here. */
register ParseValue *pvPtr; /* Information about where to place
* result of command. */
{
int level;
register char *src, *dst, *end;
register char c;
char *lastChar = string + strlen(string);
src = string;
dst = pvPtr->next;
end = pvPtr->end;
level = 1;
/*
* Copy the characters one at a time to the result area, stopping
* when the matching close-brace is found.
*/
while (1) {
c = *src;
src++;
if (dst == end) {
pvPtr->next = dst;
(*pvPtr->expandProc)(pvPtr, 20);
dst = pvPtr->next;
end = pvPtr->end;
}
*dst = c;
dst++;
if (CHAR_TYPE(src-1, lastChar) == TCL_NORMAL) {
continue;
} else if (c == '{') {
level++;
} else if (c == '}') {
level--;
if (level == 0) {
dst--; /* Don't copy the last close brace. */
break;
}
} else if (c == '\\') {
int count;
/*
* Must always squish out backslash-newlines, even when in
* braces. This is needed so that this sequence can appear
* anywhere in a command, such as the middle of an expression.
*/
if (*src == '\n') {
dst[-1] = Tcl_Backslash(src-1, &count);
src += count - 1;
} else {
(void) Tcl_Backslash(src-1, &count);
while (count > 1) {
if (dst == end) {
pvPtr->next = dst;
(*pvPtr->expandProc)(pvPtr, 20);
dst = pvPtr->next;
end = pvPtr->end;
}
*dst = *src;
dst++;
src++;
count--;
}
}
} else if (c == '\0') {
Tcl_SetResult(interp, "missing close-brace", TCL_STATIC);
*termPtr = string-1;
return TCL_ERROR;
}
}
*dst = '\0';
pvPtr->next = dst;
*termPtr = src;
return TCL_OK;
}
/*
*--------------------------------------------------------------
*
* TclExpandParseValue --
*
* This procedure is commonly used as the value of the
* expandProc in a ParseValue. It uses malloc to allocate
* more space for the result of a parse.
*
* Results:
* The buffer space in *pvPtr is reallocated to something
* larger, and if pvPtr->clientData is non-zero the old
* buffer is freed. Information is copied from the old
* buffer to the new one.
*
* Side effects:
* None.
*
*--------------------------------------------------------------
*/
void
TclExpandParseValue(pvPtr, needed)
register ParseValue *pvPtr; /* Information about buffer that
* must be expanded. If the clientData
* in the structure is non-zero, it
* means that the current buffer is
* dynamically allocated. */
int needed; /* Minimum amount of additional space
* to allocate. */
{
int newSpace;
char *new;
/*
* Either double the size of the buffer or add enough new space
* to meet the demand, whichever produces a larger new buffer.
*/
newSpace = (pvPtr->end - pvPtr->buffer) + 1;
if (newSpace < needed) {
newSpace += needed;
} else {
newSpace += newSpace;
}
new = (char *) ckalloc((unsigned) newSpace);
/*
* Copy from old buffer to new, free old buffer if needed, and
* mark new buffer as malloc-ed.
*/
memcpy((VOID *) new, (VOID *) pvPtr->buffer,
(size_t) (pvPtr->next - pvPtr->buffer));
pvPtr->next = new + (pvPtr->next - pvPtr->buffer);
if (pvPtr->clientData != 0) {
ckfree(pvPtr->buffer);
}
pvPtr->buffer = new;
pvPtr->end = new + newSpace - 1;
pvPtr->clientData = (ClientData) 1;
}
/*
*----------------------------------------------------------------------
*
* TclWordEnd --
*
* Given a pointer into a Tcl command, find the end of the next
* word of the command.
*
* Results:
* The return value is a pointer to the last character that's part
* of the word pointed to by "start". If the word doesn't end
* properly within the string then the return value is the address
* of the null character at the end of the string.
*
* Side effects:
* None.
*
*----------------------------------------------------------------------
*/
char *
TclWordEnd(start, lastChar, nested, semiPtr)
char *start; /* Beginning of a word of a Tcl command. */
char *lastChar; /* Terminating character in string. */
int nested; /* Zero means this is a top-level command.
* One means this is a nested command (close
* bracket is a word terminator). */
int *semiPtr; /* Set to 1 if word ends with a command-
* terminating semi-colon, zero otherwise.
* If NULL then ignored. */
{
register char *p;
int count;
if (semiPtr != NULL) {
*semiPtr = 0;
}
/*
* Skip leading white space (backslash-newline must be treated like
* white-space, except that it better not be the last thing in the
* command).
*/
for (p = start; ; p++) {
if (isspace(UCHAR(*p))) {
continue;
}
if ((p[0] == '\\') && (p[1] == '\n')) {
if (p+2 == lastChar) {
return p+2;
}
continue;
}
break;
}
/*
* Handle words beginning with a double-quote or a brace.
*/
if (*p == '"') {
p = QuoteEnd(p+1, lastChar, '"');
if (p == lastChar) {
return p;
}
p++;
} else if (*p == '{') {
int braces = 1;
while (braces != 0) {
p++;
while (*p == '\\') {
(void) Tcl_Backslash(p, &count);
p += count;
}
if (*p == '}') {
braces--;
} else if (*p == '{') {
braces++;
} else if (p == lastChar) {
return p;
}
}
p++;
}
/*
* Handle words that don't start with a brace or double-quote.
* This code is also invoked if the word starts with a brace or
* double-quote and there is garbage after the closing brace or
* quote. This is an error as far as Tcl_Eval is concerned, but
* for here the garbage is treated as part of the word.
*/
while (1) {
if (*p == '[') {
p = ScriptEnd(p+1, lastChar, 1);
if (p == lastChar) {
return p;
}
p++;
} else if (*p == '\\') {
if (p[1] == '\n') {
/*
* Backslash-newline: it maps to a space character
* that is a word separator, so the word ends just before
* the backslash.
*/
return p-1;
}
(void) Tcl_Backslash(p, &count);
p += count;
} else if (*p == '$') {
p = VarNameEnd(p, lastChar);
if (p == lastChar) {
return p;
}
p++;
} else if (*p == ';') {
/*
* Include the semi-colon in the word that is returned.
*/
if (semiPtr != NULL) {
*semiPtr = 1;
}
return p;
} else if (isspace(UCHAR(*p))) {
return p-1;
} else if ((*p == ']') && nested) {
return p-1;
} else if (p == lastChar) {
if (nested) {
/*
* Nested commands can't end because of the end of the
* string.
*/
return p;
}
return p-1;
} else {
p++;
}
}
}
/*
*----------------------------------------------------------------------
*
* QuoteEnd --
*
* Given a pointer to a string that obeys the parsing conventions
* for quoted things in Tcl, find the end of that quoted thing.
* The actual thing may be a quoted argument or a parenthesized
* index name.
*
* Results:
* The return value is a pointer to the last character that is
* part of the quoted string (i.e the character that's equal to
* term). If the quoted string doesn't terminate properly then
* the return value is a pointer to the null character at the
* end of the string.
*
* Side effects:
* None.
*
*----------------------------------------------------------------------
*/
static char *
QuoteEnd(string, lastChar, term)
char *string; /* Pointer to character just after opening
* "quote". */
char *lastChar; /* Terminating character in string. */
int term; /* This character will terminate the
* quoted string (e.g. '"' or ')'). */
{
register char *p = string;
int count;
while (*p != term) {
if (*p == '\\') {
(void) Tcl_Backslash(p, &count);
p += count;
} else if (*p == '[') {
for (p++; *p != ']'; p++) {
p = TclWordEnd(p, lastChar, 1, (int *) NULL);
if (*p == 0) {
return p;
}
}
p++;
} else if (*p == '$') {
p = VarNameEnd(p, lastChar);
if (*p == 0) {
return p;
}
p++;
} else if (p == lastChar) {
return p;
} else {
p++;
}
}
return p-1;
}
/*
*----------------------------------------------------------------------
*
* VarNameEnd --
*
* Given a pointer to a variable reference using $-notation, find
* the end of the variable name spec.
*
* Results:
* The return value is a pointer to the last character that
* is part of the variable name. If the variable name doesn't
* terminate properly then the return value is a pointer to the
* null character at the end of the string.
*
* Side effects:
* None.
*
*----------------------------------------------------------------------
*/
static char *
VarNameEnd(string, lastChar)
char *string; /* Pointer to dollar-sign character. */
char *lastChar; /* Terminating character in string. */
{
register char *p = string+1;
if (*p == '{') {
for (p++; (*p != '}') && (p != lastChar); p++) {
/* Empty loop body. */
}
return p;
}
while (isalnum(UCHAR(*p)) || (*p == '_')) {
p++;
}
if ((*p == '(') && (p != string+1)) {
return QuoteEnd(p+1, lastChar, ')');
}
return p-1;
}
/*
*----------------------------------------------------------------------
*
* ScriptEnd --
*
* Given a pointer to the beginning of a Tcl script, find the end of
* the script.
*
* Results:
* The return value is a pointer to the last character that's part
* of the script pointed to by "p". If the command doesn't end
* properly within the string then the return value is the address
* of the null character at the end of the string.
*
* Side effects:
* None.
*
*----------------------------------------------------------------------
*/
static char *
ScriptEnd(p, lastChar, nested)
char *p; /* Script to check. */
char *lastChar; /* Terminating character in string. */
int nested; /* Zero means this is a top-level command.
* One means this is a nested command (the
* last character of the script must be
* an unquoted ]). */
{
int commentOK = 1;
int length;
while (1) {
while (isspace(UCHAR(*p))) {
if (*p == '\n') {
commentOK = 1;
}
p++;
}
if ((*p == '#') && commentOK) {
do {
if (*p == '\\') {
/*
* If the script ends with backslash-newline, then
* this command isn't complete.
*/
if ((p[1] == '\n') && (p+2 == lastChar)) {
return p+2;
}
Tcl_Backslash(p, &length);
p += length;
} else {
p++;
}
} while ((p != lastChar) && (*p != '\n'));
continue;
}
p = TclWordEnd(p, lastChar, nested, &commentOK);
if (p == lastChar) {
return p;
}
p++;
if (nested) {
if (*p == ']') {
return p;
}
} else {
if (p == lastChar) {
return p-1;
}
}
}
}
/*
*----------------------------------------------------------------------
*
* Tcl_ParseVar --
*
* Given a string starting with a $ sign, parse off a variable
* name and return its value.
*
* Results:
* The return value is the contents of the variable given by
* the leading characters of string. If termPtr isn't NULL,
* *termPtr gets filled in with the address of the character
* just after the last one in the variable specifier. If the
* variable doesn't exist, then the return value is NULL and
* an error message will be left in interp->result.
*
* Side effects:
* None.
*
*----------------------------------------------------------------------
*/
char *
Tcl_ParseVar(interp, string, termPtr)
Tcl_Interp *interp; /* Context for looking up variable. */
register char *string; /* String containing variable name.
* First character must be "$". */
char **termPtr; /* If non-NULL, points to word to fill
* in with character just after last
* one in the variable specifier. */
{
char *name1, *name1End, c, *result;
register char *name2;
#define NUM_CHARS 200
char copyStorage[NUM_CHARS];
ParseValue pv;
/*
* There are three cases:
* 1. The $ sign is followed by an open curly brace. Then the variable
* name is everything up to the next close curly brace, and the
* variable is a scalar variable.
* 2. The $ sign is not followed by an open curly brace. Then the
* variable name is everything up to the next character that isn't
* a letter, digit, or underscore, or a "::" namespace separator.
* If the following character is an open parenthesis, then the
* information between parentheses is the array element name, which
* can include any of the substitutions permissible between quotes.
* 3. The $ sign is followed by something that isn't a letter, digit,
* underscore, or a "::" namespace separator: in this case,
* there is no variable name, and "$" is returned.
*/
name2 = NULL;
string++;
if (*string == '{') {
string++;
name1 = string;
while (*string != '}') {
if (*string == 0) {
Tcl_SetResult(interp, "missing close-brace for variable name",
TCL_STATIC);
if (termPtr != 0) {
*termPtr = string;
}
return NULL;
}
string++;
}
name1End = string;
string++;
} else {
name1 = string;
while (isalnum(UCHAR(*string)) || (*string == '_')
|| (*string == ':')) {
if (*string == ':') {
if (*(string+1) == ':') {
string += 2; /* skip over the initial :: */
while (*string == ':') {
string++; /* skip over a subsequent : */
}
} else {
break; /* : by itself */
}
} else {
string++;
}
}
if (string == name1) {
if (termPtr != 0) {
*termPtr = string;
}
return "$";
}
name1End = string;
if (*string == '(') {
char *end;
/*
* Perform substitutions on the array element name, just as
* is done for quotes.
*/
pv.buffer = pv.next = copyStorage;
pv.end = copyStorage + NUM_CHARS - 1;
pv.expandProc = TclExpandParseValue;
pv.clientData = (ClientData) NULL;
if (TclParseQuotes(interp, string+1, ')', 0, &end, &pv)
!= TCL_OK) {
char msg[200];
int length;
length = string-name1;
if (length > 100) {
length = 100;
}
sprintf(msg, "\n (parsing index for array \"%.*s\")",
length, name1);
Tcl_AddErrorInfo(interp, msg);
result = NULL;
name2 = pv.buffer;
if (termPtr != 0) {
*termPtr = end;
}
goto done;
}
Tcl_ResetResult(interp);
string = end;
name2 = pv.buffer;
}
}
if (termPtr != 0) {
*termPtr = string;
}
c = *name1End;
*name1End = 0;
result = Tcl_GetVar2(interp, name1, name2, TCL_LEAVE_ERR_MSG);
*name1End = c;
done:
if ((name2 != NULL) && (pv.buffer != copyStorage)) {
ckfree(pv.buffer);
}
return result;
}
/*
*----------------------------------------------------------------------
*
* Tcl_CommandComplete --
*
* Given a partial or complete Tcl command, this procedure
* determines whether the command is complete in the sense
* of having matched braces and quotes and brackets.
*
* Results:
* 1 is returned if the command is complete, 0 otherwise.
*
* Side effects:
* None.
*
*----------------------------------------------------------------------
*/
int
Tcl_CommandComplete(cmd)
char *cmd; /* Command to check. */
{
char *p;
if (*cmd == 0) {
return 1;
}
p = ScriptEnd(cmd, cmd+strlen(cmd), 0);
return (*p != 0);
}
/*
*----------------------------------------------------------------------
*
* TclObjCommandComplete --
*
* Given a partial or complete Tcl command in a Tcl object, this
* procedure determines whether the command is complete in the sense of
* having matched braces and quotes and brackets.
*
* Results:
* 1 is returned if the command is complete, 0 otherwise.
*
* Side effects:
* None.
*
*----------------------------------------------------------------------
*/
int
TclObjCommandComplete(cmdPtr)
Tcl_Obj *cmdPtr; /* Points to object holding command
* to check. */
{
char *cmd, *p;
int length;
cmd = Tcl_GetStringFromObj(cmdPtr, &length);
if (length == 0) {
return 1;
}
p = ScriptEnd(cmd, cmd+length, /*nested*/ 0);
return (*p != 0);
}