28     = unique_ptr<std::wstring_convert<std::codecvt_utf8<char32_t>,char32_t>>(
    29         new std::wstring_convert<std::codecvt_utf8<char32_t>,char32_t>()
    32 std::unordered_map<char32_t, expression::exptr> expression::symbols;
    41   return expression::empty;
    62   return expression::symbols.insert(pair<char32_t,exptr>(symbol, 
exptr(
new expression(symbol)))).first->second;
    67   char32_t u32Symbol(
converter->from_bytes(utf8Symbol)[0]);
    68   return expression::symbols.insert(pair<char32_t,exptr>(u32Symbol, 
exptr(
new expression(u32Symbol)))).first->second;
   136         return l->size() > r->size() ? r : l;
   182   size_t s(op != operation::empty);
   183   for (
exptr const& re : subExpressions) {
   208   if (!r.acceptingDfa) {
   209     r.acceptingDfa = unique_ptr<dfa const>(
new dfa(
dfa::builder(
gnfa(r).splitAllTransitions()).purge().merge()));
   211   return *acceptingDfa == *r.acceptingDfa;
   231       expression::symbols.
begin(),
   232       expression::symbols.
end(),
   233       [&](pair<char32_t,exptr> 
const& entry)->
bool{
return entry.second.get() == 
this;}
   236   if (it == expression::symbols.
end()) {
   237     throw std::logic_error(
"This RE does not seem to be a valid symbol expression.");
   251   if (symbol == U
'\0') {
   261     case operation::alternation : 
return subExpressions[0]->to_u32string().append(U
"+").append(subExpressions[1]->
to_u32string());
   262     case operation::concatenation : {
   264       if (subExpressions[0]->op >= operation::alternation) {
   265         concat.append(1, 
'(');
   267         concat.append(1, 
')');
   271       if (subExpressions[1]->op >= operation::alternation) {
   272         concat.append(1, 
'(');
   274         concat.append(1, 
')');
   280     case operation::kleene : {
   281       if (subExpressions[0]->op >= operation::concatenation) {
   282         return u32string(1, U
'(').append(subExpressions[0]->
to_u32string()).append(U
")*");
   284         return subExpressions[0]->to_u32string().append(1, 
'*');
   287     case operation::symbol : {
   289       return symbol == U
'\0' ? u32string(U
"ε") : u32string(1, symbol);
   291     case operation::empty : 
return u32string(U
"∅");
   292     default : 
return u32string();
   303   return subExpressions.cbegin();
   308   return subExpressions.cend();
   376     for (
unsigned char c(0); c < token::END; c++) {
   397     for (
unsigned char li(0); li < token::END; li++) {
   398     if (leftClosure[li]) {
   399       for (
unsigned char ri(0); ri < token::END; ri++) {
   400       if (rightClosure[ri]) {
   401         for (
unsigned char ci(0); ci < token::END; ci++) {
   403           for (
unsigned char successor(ci); successor != token::END; successor = 
inverseUnitGraph[successor]) {
   404             if (symbol == successor) {
   429     for (
size_t i(0); i < diag; i++) {
   431       for (
unsigned char fi(0); fi < first.size(); fi++) {
   434         for (
unsigned char si(0); si < second.size(); si++) {
   457     size_t numberOfTokens(re.length());
   459     table.reserve(numberOfTokens);
   461     for (char32_t symbol : re) {
   462       table.push_back(vector<tokens>(numberOfTokens-row, 
tokens()));
   463       if (symbol == 
lits.
L) { 
table[row][0].set(token::L); }
   464       else if (symbol == 
lits.
R) { 
table[row][0].set(token::R); }
   465       else if (symbol == 
lits.
P) { 
table[row][0].set(token::P); }
   466       else if (symbol == 
lits.
S) { 
table[row][0].set(token::S); }
   468         table[row][0].set(token::Σ);
   474     for (
size_t diag(1); diag < numberOfTokens; diag++) {
   475       for (
size_t row(0); row < numberOfTokens - diag; row++) {
   502       for (
size_t i = 0; i < diag; i++) {
   503         size_t leftDiag = diag-i-1;
   504         size_t rightDiag = i;
   505         size_t rightRow = diag+row-rightDiag;
   507           return make_pair(unique_ptr<tree>(
new tree(row, leftDiag, 
p)), unique_ptr<tree>(
new tree(rightRow, rightDiag, 
p)));
   510       return make_pair(unique_ptr<tree>(), unique_ptr<tree>());
   523           for (
unsigned char i(token::END); i > 0; i--) {
   524           if (
p->
table[row][diag][i-1]) {
   525             return static_cast<token>(i-1);
   538           return (*
children.second)(optimized, aggressive);
   544           return (*
children.second)(optimized, aggressive);
   546           return (*
children.first)(optimized, aggressive);
   568       throw std::bad_function_call();
   573     return tree(0, 
table.size()-1, 
this)(optimized, aggressive);
   578     []()->array<expression::parser::token,expression::parser::token::END>{
   579   array<token,token::END> graph;
   580   graph.fill(token::END);
   581   graph[token::Σ] = token::E;
   582   graph[token::E] = token::K;
   583   graph[token::K] = token::C;
   584   graph[token::C] = token::A;
   590     []()->array<array<expression::parser::tokens,expression::parser::token::END>,expression::parser::token::END>{
   591   array<tokens,token::END> noPredecessor;
   592   noPredecessor.fill(tokens());
   593   array<array<tokens,token::END>,token::END> rules;
   594   rules.fill(noPredecessor);
   595   rules[token::A][token::B].set(token::A);
   596   rules[token::P][token::C].set(token::B);
   597   rules[token::C][token::K].set(token::C);
   598   rules[token::E][token::S].set(token::K);
   599   rules[token::L][token::F].set(token::E);
   600   rules[token::A][token::R].set(token::F);
   614   parser stringParser(re, lits);
   617     return stringParser(optimized, aggressive);
   618   } 
catch (std::bad_function_call e) {
   619     throw std::invalid_argument(
"Invalid regular expression.");
   625   return stringParser(optimized, aggressive);
   635 expression::expression() : op(operation::empty) {}
   636 expression::expression(char32_t symbol) : op(operation::symbol) {}
   637 expression::expression(exptr 
const& l, exptr 
const& r, operation op) : subExpressions({l, r}), op(op) {}
   638 expression::expression(exptr 
const& b) : subExpressions({b}), op(operation::kleene) {}
 Number of elements in this enumeration, NOT AN ACTUAL TOKEN! 
 
Represents the table entries as binary trees. 
 
static array< array< tokens, token::END >, token::END > const inverseBinaryRules
Maps pairs of symbols to the symbols that derive them. 
 
static std::unique_ptr< std::wstring_convert< std::codecvt_utf8< char32_t >, char32_t > > const converter
Converts between UTF-8-encoded and UTF-32-encoded strings. 
 
char32_t const S
The Kleene star. 
 
static bool canDerive(token symbol, tokens const &left, tokens const &right)
Checks if a token could derive a pair of tokens from two other entries. 
 
A concatenation expression. 
 
static tokens getUClosure(tokens const &m)
Constructs the reflexive-transitive closure of the inverse unit relation for a given set of symbols...
 
pair< unique_ptr< tree >, unique_ptr< tree > > findNextPair(token symbol, size_t row, size_t diag, parser const *p)
Finds the child trees that can be derived from a given entry. 
 
token symbol
This tree's root symbol. 
 
bool operator!=(expression const &r) const
Checks whether this RE is semantically different from another one. 
 
Token literals as used in Introduction to Automata Theory, Languages, and Computation by Hopcroft...
 
Represents deterministic finite automata. 
 
char32_t const EMPTY
Neutral element of alternation and annihilating element of concatenation, a.k.a. empty set...
 
std::vector< exptr >::const_iterator begin() const
Returns an iterator pointing to this RE's first subexpression. 
 
static exptr const  & spawnEmptyString()
Gives an RE representing the empty string ε. 
 
nfa splitAllTransitions()
Splits all transitions until only ∅, ε, and symbol REs remain and builds the resulting NFA...
 
vector< vector< tokens > > table
The table of sets of symbols that derive a subsentence. 
 
exptr operator()(bool optimized, bool aggressive)
Gives the RE encoded in this tree. 
 
static exptr spawnFromString(std::u32string const &re, literals lits=literals(), bool optimized=false, bool aggressive=false)
Gives an RE encoded in a given string. 
 
static exptr spawnAlternation(exptr const &l, exptr const &r, bool optimized=true, bool aggressive=false)
Gives an RE representing the alternation of two given REs. 
 
Represents formal regular expressions. 
 
Beginning of a new subexpression. 
 
std::u32string to_u32string() const
Describes this RE in UTF-32-encoded human-readable form. 
 
bool badRegularExpression
Signifies that the RE used to initialize this object was invalid. 
 
char32_t extractSymbol() const
Reports this symbol expression's UTF-32-encoded symbol. 
 
literals lits
Stores interpretations for characters encountered in the parsed string. 
 
static exptr const  & spawnSymbol(char32_t symbol)
Gives an RE representing the given UTF-32-encoded symbol. 
 
pair< unique_ptr< tree >, unique_ptr< tree > > children
Trees with the symbols of the entry's derived pair as root. 
 
vector< char32_t > symbolMapping
Stores the actual symbols encountered in the RE while parsing. 
 
operation
The different purposes an RE may fulfill. 
 
Parses regular expressions. 
 
Contains the reg::nfa class definition. 
 
token
Tokens the grammar deals with. 
 
operation getOperation() const
Reports this RE's function. 
 
Contains the reg::expression class defintion. 
 
std::vector< exptr >::const_iterator end() const
Returns an iterator pointing behind this RE's last subexpression. 
 
Represents generalized nondeterministic finite automata. 
 
std::string to_string() const
Describes this RE in UTF-32-encoded human-readable form. 
 
size_t symbolMappingIndex
Index for when symbols have to be extracted from the mapping. 
 
Second part of an alternation expression. 
 
bool operator==(expression const &r) const
Checks whether this RE is semantically equivalent to another one. 
 
char32_t const L
The left parenthesis. 
 
builder & merge()
Merges the prospective DFA's indistinguishable states, allowing for minimization. ...
 
Constructs DFAs step by step. 
 
parser const  * p
Points to the parser this tree belongs to. 
 
Beginning of an alternation expression. 
 
Contains the reg::gnfa class definition. 
 
char32_t const P
The alternation symbol. 
 
size_t size() const
Reports the size of this RE's tree representation. 
 
static void compileTableEntry(size_t row, size_t diag, vector< vector< tokens >> &table)
Fills a table entry. 
 
char32_t const EPSILON
Neutral element of concatenation, a.k.a. empty string. 
 
bitset< token::END > tokens
Tokens don't usually come alone. 
 
parser(u32string const &re, literals const &lits)
Initializes with a string to parse and literals to parse for. 
 
exptr operator()(bool optimized, bool aggressive)
Gives the RE resulting from parsing. 
 
tree(size_t row, size_t diag, parser const *p)
Initializes a tree with a given table entry as root. 
 
std::shared_ptr< expression const  > exptr
This is the type used to handle regular expressions. 
 
static exptr const  & spawnEmptySet()
Gives an RE representing the empty set ∅. 
 
char32_t const R
The right parenthesis. 
 
Contains the reg::dfa class definition. 
 
std::string extractUtf8Symbol() const
Reports this symbol expression's UTF-8-encoded symbol. 
 
static array< token, token::END > const inverseUnitGraph
Maps symbols that may be derived in-place to their predecessing symbols. 
 
static exptr spawnKleene(exptr const &b, bool optimized=true, bool aggressive=false)
Gives an RE representing the Kleene closure of a given RE. 
 
static exptr spawnConcatenation(exptr const &l, exptr const &r, bool optimized=true, bool aggressive=false)
Gives an RE representing the concatenation of two given REs. 
 
Second part of a new subexpression.