freebsd-dev/usr.bin/dtc/input_buffer.cc
David Chisnall c64a3eaf92 Improvements to BSD-licensed DTC.
- Added an expression parser so that expressions from headers are now working
- Fixed missing null terminators on cross references
- Disabled exceptions / RTTI in the build for smaller binaries
- Changed phandle order generation to be identical to GPL'd dtc
2015-12-29 16:29:42 +00:00

832 lines
16 KiB
C++

/*-
* Copyright (c) 2013 David Chisnall
* All rights reserved.
*
* This software was developed by SRI International and the University of
* Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
* ("CTSRD"), as part of the DARPA CRASH research programme.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include "input_buffer.hh"
#include <ctype.h>
#include <limits.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <functional>
#ifndef NDEBUG
#include <iostream>
#endif
#include <sys/stat.h>
#include <sys/mman.h>
#include <assert.h>
#ifndef MAP_PREFAULT_READ
#define MAP_PREFAULT_READ 0
#endif
namespace dtc
{
void
input_buffer::skip_spaces()
{
if (cursor >= size) { return; }
if (cursor < 0) { return; }
char c = buffer[cursor];
while ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\f')
|| (c == '\v') || (c == '\r'))
{
cursor++;
if (cursor > size)
{
c = '\0';
}
else
{
c = buffer[cursor];
}
}
}
input_buffer
input_buffer::buffer_from_offset(int offset, int s)
{
if (s == 0)
{
s = size - offset;
}
if (offset > size)
{
return input_buffer();
}
if (s > (size-offset))
{
return input_buffer();
}
return input_buffer(&buffer[offset], s);
}
bool
input_buffer::consume(const char *str)
{
int len = strlen(str);
if (len > size - cursor)
{
return false;
}
else
{
for (int i=0 ; i<len ; ++i)
{
if (str[i] != buffer[cursor + i])
{
return false;
}
}
cursor += len;
return true;
}
return false;
}
bool
input_buffer::consume_integer(unsigned long long &outInt)
{
// The first character must be a digit. Hex and octal strings
// are prefixed by 0 and 0x, respectively.
if (!isdigit((*this)[0]))
{
return false;
}
char *end=0;
outInt = strtoull(&buffer[cursor], &end, 0);
if (end == &buffer[cursor])
{
return false;
}
cursor = end - buffer;
return true;
}
namespace {
/**
* Convenience typedef for the type that we use for all values.
*/
typedef unsigned long long valty;
/**
* Expression tree currently being parsed.
*/
struct expression
{
/**
* Evaluate this node, taking into account operator precedence.
*/
virtual valty operator()() = 0;
/**
* Returns the precedence of this node. Lower values indicate higher
* precedence.
*/
virtual int precedence() = 0;
virtual ~expression() {}
#ifndef NDEBUG
/**
* Dumps this expression to `std::cerr`, appending a newline if `nl` is
* `true`.
*/
void dump(bool nl=false)
{
if (this == nullptr)
{
std::cerr << "{nullptr}\n";
return;
}
dump_impl();
if (nl)
{
std::cerr << '\n';
}
}
private:
/**
* Method that sublcasses override to implement the behaviour of `dump()`.
*/
virtual void dump_impl() = 0;
#endif
};
/**
* Expression wrapping a single integer. Leaf nodes in the expression tree.
*/
class terminal_expr : public expression
{
/**
* The value that this wraps.
*/
valty val;
/**
* Evaluate. Trivially returns the value that this class wraps.
*/
valty operator()() override
{
return val;
}
int precedence() override
{
return 0;
}
public:
/**
* Constructor.
*/
terminal_expr(valty v) : val(v) {}
#ifndef NDEBUG
void dump_impl() override { std::cerr << val; }
#endif
};
/**
* Parenthetical expression. Exists to make the contents opaque.
*/
struct paren_expression : public expression
{
/**
* The expression within the parentheses.
*/
expression_ptr subexpr;
/**
* Constructor. Takes the child expression as the only argument.
*/
paren_expression(expression_ptr p) : subexpr(std::move(p)) {}
int precedence() override
{
return 0;
}
/**
* Evaluate - just forwards to the underlying expression.
*/
valty operator()() override
{
return (*subexpr)();
}
#ifndef NDEBUG
void dump_impl() override
{
std::cerr << " (";
subexpr->dump();
std::cerr << ") ";
}
#endif
};
/**
* Template class for unary operators. The `OpChar` template parameter is
* solely for debugging and makes it easy to print the expression. The `Op`
* template parameter is a function object that implements the operator that
* this class provides. Most of these are provided by the `<functional>`
* header.
*/
template<char OpChar, class Op>
class unary_operator : public expression
{
/**
* The subexpression for this unary operator.
*/
expression_ptr subexpr;
valty operator()() override
{
Op op;
return op((*subexpr)());
}
/**
* All unary operators have the same precedence. They are all evaluated
* before binary expressions, but after parentheses.
*/
int precedence() override
{
return 3;
}
public:
unary_operator(expression_ptr p) : subexpr(std::move(p)) {}
#ifndef NDEBUG
void dump_impl() override
{
std::cerr << OpChar;
subexpr->dump();
}
#endif
};
/**
* Abstract base class for binary operators. Allows the tree to be modified
* without knowing what the operations actually are.
*/
struct binary_operator_base : public expression
{
/**
* The left side of the expression.
*/
expression_ptr lhs;
/**
* The right side of the expression.
*/
expression_ptr rhs;
/**
* Insert a node somewhere down the path of left children, until it would
* be preempting something that should execute first.
*/
void insert_left(binary_operator_base *new_left)
{
if (lhs->precedence() < new_left->precedence())
{
new_left->rhs = std::move(lhs);
lhs.reset(new_left);
}
else
{
static_cast<binary_operator_base*>(lhs.get())->insert_left(new_left);
}
}
};
/**
* Template class for binary operators. The precedence and the operation are
* provided as template parameters.
*/
template<int Precedence, class Op>
struct binary_operator : public binary_operator_base
{
valty operator()() override
{
Op op;
return op((*lhs)(), (*rhs)());
}
int precedence() override
{
return Precedence;
}
#ifdef NDEBUG
/**
* Constructor. Takes the name of the operator as an argument, for
* debugging. Only stores it in debug mode.
*/
binary_operator(const char *) {}
#else
const char *opName;
binary_operator(const char *o) : opName(o) {}
void dump_impl() override
{
lhs->dump();
std::cerr << opName;
rhs->dump();
}
#endif
};
/**
* Ternary conditional operators (`cond ? true : false`) are a special case -
* there are no other ternary operators.
*/
class ternary_conditional_operator : public expression
{
/**
* The condition for the clause.
*/
expression_ptr cond;
/**
* The expression that this evaluates to if the condition is true.
*/
expression_ptr lhs;
/**
* The expression that this evaluates to if the condition is false.
*/
expression_ptr rhs;
valty operator()() override
{
return (*cond)() ? (*lhs)() : (*rhs)();
}
int precedence() override
{
// The actual precedence of a ternary conditional operator is 15, but
// its associativity is the opposite way around to the other operators,
// so we fudge it slightly.
return 3;
}
#ifndef NDEBUG
void dump_impl() override
{
cond->dump();
std::cerr << " ? ";
lhs->dump();
std::cerr << " : ";
rhs->dump();
}
#endif
public:
ternary_conditional_operator(expression_ptr c,
expression_ptr l,
expression_ptr r) :
cond(std::move(c)), lhs(std::move(l)), rhs(std::move(r)) {}
};
template<typename T>
struct lshift
{
constexpr T operator()(const T &lhs, const T &rhs) const
{
return lhs << rhs;
}
};
template<typename T>
struct rshift
{
constexpr T operator()(const T &lhs, const T &rhs) const
{
return lhs >> rhs;
}
};
template<typename T>
struct unary_plus
{
constexpr T operator()(const T &val) const
{
return +val;
}
};
// TODO: Replace with std::bit_not once we can guarantee C++14 as a baseline.
template<typename T>
struct bit_not
{
constexpr T operator()(const T &val) const
{
return ~val;
}
};
} // anonymous namespace
expression_ptr input_buffer::parse_binary_expression(expression_ptr lhs)
{
next_token();
binary_operator_base *expr = nullptr;
char op = ((*this)[0]);
switch (op)
{
default:
return lhs;
case '+':
expr = new binary_operator<6, std::plus<valty>>("+");
break;
case '-':
expr = new binary_operator<6, std::minus<valty>>("-");
break;
case '%':
expr = new binary_operator<5, std::modulus<valty>>("%");
break;
case '*':
expr = new binary_operator<5, std::multiplies<valty>>("*");
break;
case '/':
expr = new binary_operator<5, std::divides<valty>>("/");
break;
case '<':
cursor++;
switch ((*this)[0])
{
default:
parse_error("Invalid operator");
return nullptr;
case ' ':
case '(':
case '0'...'9':
cursor--;
expr = new binary_operator<8, std::less<valty>>("<");
break;
case '=':
expr = new binary_operator<8, std::less_equal<valty>>("<=");
break;
case '<':
expr = new binary_operator<7, lshift<valty>>("<<");
break;
}
break;
case '>':
cursor++;
switch ((*this)[0])
{
default:
parse_error("Invalid operator");
return nullptr;
case '(':
case ' ':
case '0'...'9':
cursor--;
expr = new binary_operator<8, std::greater<valty>>(">");
break;
case '=':
expr = new binary_operator<8, std::greater_equal<valty>>(">=");
break;
case '>':
expr = new binary_operator<7, rshift<valty>>(">>");
break;
return lhs;
}
break;
case '=':
if ((*this)[1] != '=')
{
parse_error("Invalid operator");
return nullptr;
}
consume('=');
expr = new binary_operator<9, std::equal_to<valty>>("==");
break;
case '!':
if ((*this)[1] != '=')
{
parse_error("Invalid operator");
return nullptr;
}
cursor++;
expr = new binary_operator<9, std::not_equal_to<valty>>("!=");
break;
case '&':
if ((*this)[1] == '&')
{
expr = new binary_operator<13, std::logical_and<valty>>("&&");
}
else
{
expr = new binary_operator<10, std::bit_and<valty>>("&");
}
break;
case '|':
if ((*this)[1] == '|')
{
expr = new binary_operator<12, std::logical_or<valty>>("||");
}
else
{
expr = new binary_operator<14, std::bit_or<valty>>("|");
}
break;
case '?':
{
consume('?');
expression_ptr true_case = parse_expression();
next_token();
if (!true_case || !consume(':'))
{
parse_error("Expected : in ternary conditional operator");
return nullptr;
}
expression_ptr false_case = parse_expression();
if (!false_case)
{
parse_error("Expected false condition for ternary operator");
return nullptr;
}
return expression_ptr(new ternary_conditional_operator(std::move(lhs),
std::move(true_case), std::move(false_case)));
}
}
cursor++;
next_token();
expression_ptr e(expr);
expression_ptr rhs(parse_expression());
if (!rhs)
{
return nullptr;
}
expr->lhs = std::move(lhs);
if (rhs->precedence() < expr->precedence())
{
expr->rhs = std::move(rhs);
}
else
{
// If we're a normal left-to-right expression, then we need to insert
// this as the far-left child node of the rhs expression
binary_operator_base *rhs_op =
static_cast<binary_operator_base*>(rhs.get());
rhs_op->insert_left(expr);
e.release();
return rhs;
}
return e;
}
expression_ptr input_buffer::parse_expression(bool stopAtParen)
{
next_token();
unsigned long long leftVal;
expression_ptr lhs;
switch ((*this)[0])
{
case '0'...'9':
if (!consume_integer(leftVal))
{
return nullptr;
}
lhs.reset(new terminal_expr(leftVal));
break;
case '(':
{
consume('(');
expression_ptr &&subexpr = parse_expression();
if (!subexpr)
{
return nullptr;
}
lhs.reset(new paren_expression(std::move(subexpr)));
if (!consume(')'))
{
return nullptr;
}
if (stopAtParen)
{
return lhs;
}
break;
}
case '+':
{
consume('+');
expression_ptr &&subexpr = parse_expression();
if (!subexpr)
{
return nullptr;
}
lhs.reset(new unary_operator<'+', unary_plus<valty>>(std::move(subexpr)));
break;
}
case '-':
{
consume('-');
expression_ptr &&subexpr = parse_expression();
if (!subexpr)
{
return nullptr;
}
lhs.reset(new unary_operator<'-', std::negate<valty>>(std::move(subexpr)));
break;
}
case '!':
{
consume('!');
expression_ptr &&subexpr = parse_expression();
if (!subexpr)
{
return nullptr;
}
lhs.reset(new unary_operator<'!', std::logical_not<valty>>(std::move(subexpr)));
break;
}
case '~':
{
consume('~');
expression_ptr &&subexpr = parse_expression();
if (!subexpr)
{
return nullptr;
}
lhs.reset(new unary_operator<'~', bit_not<valty>>(std::move(subexpr)));
break;
}
}
if (!lhs)
{
return nullptr;
}
return parse_binary_expression(std::move(lhs));
}
bool
input_buffer::consume_integer_expression(unsigned long long &outInt)
{
switch ((*this)[0])
{
case '(':
{
expression_ptr e(parse_expression(true));
if (!e)
{
return false;
}
outInt = (*e)();
return true;
}
case '0'...'9':
return consume_integer(outInt);
default:
return false;
}
}
bool
input_buffer::consume_hex_byte(uint8_t &outByte)
{
if (!ishexdigit((*this)[0]) && !ishexdigit((*this)[1]))
{
return false;
}
outByte = (digittoint((*this)[0]) << 4) | digittoint((*this)[1]);
cursor += 2;
return true;
}
input_buffer&
input_buffer::next_token()
{
int start;
do {
start = cursor;
skip_spaces();
// Parse /* comments
if ((*this)[0] == '/' && (*this)[1] == '*')
{
// eat the start of the comment
++(*this);
++(*this);
do {
// Find the ending * of */
while ((**this != '\0') && (**this != '*'))
{
++(*this);
}
// Eat the *
++(*this);
} while ((**this != '\0') && (**this != '/'));
// Eat the /
++(*this);
}
// Parse // comments
if (((*this)[0] == '/' && (*this)[1] == '/'))
{
// eat the start of the comment
++(*this);
++(*this);
// Find the ending of the line
while (**this != '\n')
{
++(*this);
}
// Eat the \n
++(*this);
}
} while (start != cursor);
return *this;
}
void
input_buffer::parse_error(const char *msg)
{
int line_count = 1;
int line_start = 0;
int line_end = cursor;
for (int i=cursor ; i>0 ; --i)
{
if (buffer[i] == '\n')
{
line_count++;
if (line_start == 0)
{
line_start = i+1;
}
}
}
for (int i=cursor+1 ; i<size ; ++i)
{
if (buffer[i] == '\n')
{
line_end = i;
break;
}
}
fprintf(stderr, "Error on line %d: %s\n", line_count, msg);
fwrite(&buffer[line_start], line_end-line_start, 1, stderr);
putc('\n', stderr);
for (int i=0 ; i<(cursor-line_start) ; ++i)
{
char c = (buffer[i+line_start] == '\t') ? '\t' : ' ';
putc(c, stderr);
}
putc('^', stderr);
putc('\n', stderr);
}
#ifndef NDEBUG
void
input_buffer::dump()
{
fprintf(stderr, "Current cursor: %d\n", cursor);
fwrite(&buffer[cursor], size-cursor, 1, stderr);
}
#endif
mmap_input_buffer::mmap_input_buffer(int fd) : input_buffer(0, 0)
{
struct stat sb;
if (fstat(fd, &sb))
{
perror("Failed to stat file");
}
size = sb.st_size;
buffer = (const char*)mmap(0, size, PROT_READ, MAP_PRIVATE |
MAP_PREFAULT_READ, fd, 0);
if (buffer == MAP_FAILED)
{
perror("Failed to mmap file");
exit(EXIT_FAILURE);
}
}
mmap_input_buffer::~mmap_input_buffer()
{
if (buffer != 0)
{
munmap((void*)buffer, size);
}
}
stream_input_buffer::stream_input_buffer() : input_buffer(0, 0)
{
int c;
while ((c = fgetc(stdin)) != EOF)
{
b.push_back(c);
}
buffer = b.data();
size = b.size();
}
} // namespace dtc