337 lines
9.0 KiB
C++
337 lines
9.0 KiB
C++
//===- llvm/ADT/Trie.h ---- Generic trie structure --------------*- C++ -*-===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This class defines a generic trie structure. The trie structure
|
|
// is immutable after creation, but the payload contained within it is not.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_ADT_TRIE_H
|
|
#define LLVM_ADT_TRIE_H
|
|
|
|
#include "llvm/ADT/GraphTraits.h"
|
|
#include "llvm/Support/DOTGraphTraits.h"
|
|
|
|
#include <cassert>
|
|
#include <vector>
|
|
|
|
namespace llvm {
|
|
|
|
// FIXME:
|
|
// - Labels are usually small, maybe it's better to use SmallString
|
|
// - Should we use char* during construction?
|
|
// - Should we templatize Empty with traits-like interface?
|
|
|
|
template<class Payload>
|
|
class Trie {
|
|
friend class GraphTraits<Trie<Payload> >;
|
|
friend class DOTGraphTraits<Trie<Payload> >;
|
|
public:
|
|
class Node {
|
|
friend class Trie;
|
|
|
|
public:
|
|
typedef std::vector<Node*> NodeVectorType;
|
|
typedef typename NodeVectorType::iterator iterator;
|
|
typedef typename NodeVectorType::const_iterator const_iterator;
|
|
|
|
private:
|
|
enum QueryResult {
|
|
Same = -3,
|
|
StringIsPrefix = -2,
|
|
LabelIsPrefix = -1,
|
|
DontMatch = 0,
|
|
HaveCommonPart
|
|
};
|
|
|
|
struct NodeCmp {
|
|
bool operator() (Node* N1, Node* N2) {
|
|
return (N1->Label[0] < N2->Label[0]);
|
|
}
|
|
bool operator() (Node* N, char Id) {
|
|
return (N->Label[0] < Id);
|
|
}
|
|
};
|
|
|
|
std::string Label;
|
|
Payload Data;
|
|
NodeVectorType Children;
|
|
|
|
// Do not implement
|
|
Node(const Node&);
|
|
Node& operator=(const Node&);
|
|
|
|
inline void addEdge(Node* N) {
|
|
if (Children.empty())
|
|
Children.push_back(N);
|
|
else {
|
|
iterator I = std::lower_bound(Children.begin(), Children.end(),
|
|
N, NodeCmp());
|
|
// FIXME: no dups are allowed
|
|
Children.insert(I, N);
|
|
}
|
|
}
|
|
|
|
inline void setEdge(Node* N) {
|
|
char Id = N->Label[0];
|
|
iterator I = std::lower_bound(Children.begin(), Children.end(),
|
|
Id, NodeCmp());
|
|
assert(I != Children.end() && "Node does not exists!");
|
|
*I = N;
|
|
}
|
|
|
|
QueryResult query(const std::string& s) const {
|
|
unsigned i, l;
|
|
unsigned l1 = s.length();
|
|
unsigned l2 = Label.length();
|
|
|
|
// Find the length of common part
|
|
l = std::min(l1, l2);
|
|
i = 0;
|
|
while ((i < l) && (s[i] == Label[i]))
|
|
++i;
|
|
|
|
if (i == l) { // One is prefix of another, find who is who
|
|
if (l1 == l2)
|
|
return Same;
|
|
else if (i == l1)
|
|
return StringIsPrefix;
|
|
else
|
|
return LabelIsPrefix;
|
|
} else // s and Label have common (possible empty) part, return its length
|
|
return (QueryResult)i;
|
|
}
|
|
|
|
public:
|
|
inline explicit Node(const Payload& data, const std::string& label = ""):
|
|
Label(label), Data(data) { }
|
|
|
|
inline const Payload& data() const { return Data; }
|
|
inline void setData(const Payload& data) { Data = data; }
|
|
|
|
inline const std::string& label() const { return Label; }
|
|
|
|
#if 0
|
|
inline void dump() {
|
|
llvm::cerr << "Node: " << this << "\n"
|
|
<< "Label: " << Label << "\n"
|
|
<< "Children:\n";
|
|
|
|
for (iterator I = Children.begin(), E = Children.end(); I != E; ++I)
|
|
llvm::cerr << (*I)->Label << "\n";
|
|
}
|
|
#endif
|
|
|
|
inline Node* getEdge(char Id) {
|
|
Node* fNode = NULL;
|
|
iterator I = std::lower_bound(Children.begin(), Children.end(),
|
|
Id, NodeCmp());
|
|
if (I != Children.end() && (*I)->Label[0] == Id)
|
|
fNode = *I;
|
|
|
|
return fNode;
|
|
}
|
|
|
|
inline iterator begin() { return Children.begin(); }
|
|
inline const_iterator begin() const { return Children.begin(); }
|
|
inline iterator end () { return Children.end(); }
|
|
inline const_iterator end () const { return Children.end(); }
|
|
|
|
inline size_t size () const { return Children.size(); }
|
|
inline bool empty() const { return Children.empty(); }
|
|
inline const Node* &front() const { return Children.front(); }
|
|
inline Node* &front() { return Children.front(); }
|
|
inline const Node* &back() const { return Children.back(); }
|
|
inline Node* &back() { return Children.back(); }
|
|
|
|
};
|
|
|
|
private:
|
|
std::vector<Node*> Nodes;
|
|
Payload Empty;
|
|
|
|
inline Node* addNode(const Payload& data, const std::string label = "") {
|
|
Node* N = new Node(data, label);
|
|
Nodes.push_back(N);
|
|
return N;
|
|
}
|
|
|
|
inline Node* splitEdge(Node* N, char Id, size_t index) {
|
|
Node* eNode = N->getEdge(Id);
|
|
assert(eNode && "Node doesn't exist");
|
|
|
|
const std::string &l = eNode->Label;
|
|
assert(index > 0 && index < l.length() && "Trying to split too far!");
|
|
std::string l1 = l.substr(0, index);
|
|
std::string l2 = l.substr(index);
|
|
|
|
Node* nNode = addNode(Empty, l1);
|
|
N->setEdge(nNode);
|
|
|
|
eNode->Label = l2;
|
|
nNode->addEdge(eNode);
|
|
|
|
return nNode;
|
|
}
|
|
|
|
// Do not implement
|
|
Trie(const Trie&);
|
|
Trie& operator=(const Trie&);
|
|
|
|
public:
|
|
inline explicit Trie(const Payload& empty):Empty(empty) {
|
|
addNode(Empty);
|
|
}
|
|
inline ~Trie() {
|
|
for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
|
|
delete Nodes[i];
|
|
}
|
|
|
|
inline Node* getRoot() const { return Nodes[0]; }
|
|
|
|
bool addString(const std::string& s, const Payload& data);
|
|
const Payload& lookup(const std::string& s) const;
|
|
|
|
};
|
|
|
|
// Define this out-of-line to dissuade the C++ compiler from inlining it.
|
|
template<class Payload>
|
|
bool Trie<Payload>::addString(const std::string& s, const Payload& data) {
|
|
Node* cNode = getRoot();
|
|
Node* tNode = NULL;
|
|
std::string s1(s);
|
|
|
|
while (tNode == NULL) {
|
|
char Id = s1[0];
|
|
if (Node* nNode = cNode->getEdge(Id)) {
|
|
typename Node::QueryResult r = nNode->query(s1);
|
|
|
|
switch (r) {
|
|
case Node::Same:
|
|
case Node::StringIsPrefix:
|
|
// Currently we don't allow to have two strings in the trie one
|
|
// being a prefix of another. This should be fixed.
|
|
assert(0 && "FIXME!");
|
|
return false;
|
|
case Node::DontMatch:
|
|
assert(0 && "Impossible!");
|
|
return false;
|
|
case Node::LabelIsPrefix:
|
|
s1 = s1.substr(nNode->label().length());
|
|
cNode = nNode;
|
|
break;
|
|
default:
|
|
nNode = splitEdge(cNode, Id, r);
|
|
tNode = addNode(data, s1.substr(r));
|
|
nNode->addEdge(tNode);
|
|
}
|
|
} else {
|
|
tNode = addNode(data, s1);
|
|
cNode->addEdge(tNode);
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
template<class Payload>
|
|
const Payload& Trie<Payload>::lookup(const std::string& s) const {
|
|
Node* cNode = getRoot();
|
|
Node* tNode = NULL;
|
|
std::string s1(s);
|
|
|
|
while (tNode == NULL) {
|
|
char Id = s1[0];
|
|
if (Node* nNode = cNode->getEdge(Id)) {
|
|
typename Node::QueryResult r = nNode->query(s1);
|
|
|
|
switch (r) {
|
|
case Node::Same:
|
|
tNode = nNode;
|
|
break;
|
|
case Node::StringIsPrefix:
|
|
return Empty;
|
|
case Node::DontMatch:
|
|
assert(0 && "Impossible!");
|
|
return Empty;
|
|
case Node::LabelIsPrefix:
|
|
s1 = s1.substr(nNode->label().length());
|
|
cNode = nNode;
|
|
break;
|
|
default:
|
|
return Empty;
|
|
}
|
|
} else
|
|
return Empty;
|
|
}
|
|
|
|
return tNode->data();
|
|
}
|
|
|
|
template<class Payload>
|
|
struct GraphTraits<Trie<Payload> > {
|
|
typedef Trie<Payload> TrieType;
|
|
typedef typename TrieType::Node NodeType;
|
|
typedef typename NodeType::iterator ChildIteratorType;
|
|
|
|
static inline NodeType *getEntryNode(const TrieType& T) {
|
|
return T.getRoot();
|
|
}
|
|
|
|
static inline ChildIteratorType child_begin(NodeType *N) {
|
|
return N->begin();
|
|
}
|
|
static inline ChildIteratorType child_end(NodeType *N) { return N->end(); }
|
|
|
|
typedef typename std::vector<NodeType*>::const_iterator nodes_iterator;
|
|
|
|
static inline nodes_iterator nodes_begin(const TrieType& G) {
|
|
return G.Nodes.begin();
|
|
}
|
|
static inline nodes_iterator nodes_end(const TrieType& G) {
|
|
return G.Nodes.end();
|
|
}
|
|
|
|
};
|
|
|
|
template<class Payload>
|
|
struct DOTGraphTraits<Trie<Payload> > : public DefaultDOTGraphTraits {
|
|
typedef typename Trie<Payload>::Node NodeType;
|
|
typedef typename GraphTraits<Trie<Payload> >::ChildIteratorType EdgeIter;
|
|
|
|
static std::string getGraphName(const Trie<Payload>& T) {
|
|
return "Trie";
|
|
}
|
|
|
|
static std::string getNodeLabel(NodeType* Node, const Trie<Payload>& T) {
|
|
if (T.getRoot() == Node)
|
|
return "<Root>";
|
|
else
|
|
return Node->label();
|
|
}
|
|
|
|
static std::string getEdgeSourceLabel(NodeType* Node, EdgeIter I) {
|
|
NodeType* N = *I;
|
|
return N->label().substr(0, 1);
|
|
}
|
|
|
|
static std::string getNodeAttributes(const NodeType* Node,
|
|
const Trie<Payload>& T) {
|
|
if (Node->data() != T.Empty)
|
|
return "color=blue";
|
|
|
|
return "";
|
|
}
|
|
|
|
};
|
|
|
|
} // end of llvm namespace
|
|
|
|
#endif // LLVM_ADT_TRIE_H
|