9 using std::make_unique;
15 #include <unordered_map> 57 for (char32_t symbol : this->alphabet) {
58 if (symbol == U
'\0') {
85 p(new pImpl(alphabet, transitions, labels, acceptingStates)) {}
89 return p->getEquivalentDfa(
this) == n.p->getEquivalentDfa(&n);
94 return p->getEquivalentDfa(
this) != n.p->getEquivalentDfa(&n);
106 if (si >= p->alphabet.size()) {
109 if (qi >= p->labels.size()) {
112 return p->transitions[qi][si];
166 if (qi >= p->labels.size()) {
209 for (char32_t symbol : word) {
250 if (p->accepting[0]) {
255 shortestWords.emplace(0, U
"");
256 while (shortestWords.size() > oldSize) {
257 oldSize = shortestWords.size();
258 for (
auto const& stateWord : shortestWords) {
259 for (
auto symbol : p->alphabet) {
262 for (
size_t q = 0; q < reached.size(); q++) {
if (reached[q]) {
263 if (p->accepting[q]) {
266 if (shortestWords.find(q) == shortestWords.end()) {
267 shortestWords.emplace(q, shWord);
288 if (qi >= p->labels.size()) {
290 string(
"There is no state with index ").append(std::to_string(qi))
293 if (p->epsClosures[qi].size() == 0) {
294 p->epsClosures[qi].resize(p->labels.size());
295 p->epsClosures[qi][qi] =
true;
301 for (
bool qqb : old) {
304 for (
bool pb : p->transitions[qqi][0]) {
306 if (!p->epsClosures[qi][pi]) {
308 p->epsClosures[qi][pi] =
true;
318 return p->epsClosures[qi];
343 for (
size_t q = 0; q < range; q++) {
367 return p->labels.at(qi);
378 for (
size_t q = 0; q < range; q++) {
380 label = label.append(1, label.length() == 0 ?
'{' :
',');
381 label = label.append(p->labels.at(q));
384 if (label.length() == 0) {
387 return label.append(1,
'}');
409 for (
size_t q = 0; q < range; q++) {
475 return p->utf8Alphabet;
501 if (qi >= p->labels.size()) {
503 string(
"There is no state with index ").append(std::to_string(qi))
506 return p->accepting[qi];
530 for (
size_t q = 0; q < range; q++) {
531 if (qs[q] && p->accepting[q]) {
545 if (p->accepting[qi] && qs.count(
getStates()[qi])) {
636 p.reset(
new pImpl(*(n.p)));
651 nfa::~nfa() =
default;
690 for (
size_t qi = 0; qi <
nfa.p->labels.size(); ++qi) {
692 for (char32_t symbol : p->alphabet) {
715 transitions[initial];
717 for (char32_t symbol : alphabet) {
724 p = make_unique<pImpl>(initial, acceptingStates, alphabet, transitions);
737 p.reset(
new pImpl(*(b.p)));
758 p->alphabet.insert(symbol);
764 return addSymbol(
converter.from_bytes(utf8Symbol)[0]);
774 if (p->transitions.empty()) {
777 p->transitions[state];
779 p->acceptingStates.insert(state);
781 p->acceptingStates.erase(state);
793 p->transitions[state];
805 if (p->transitions.empty()) {
809 p->transitions[from][symbol].insert(to);
816 return addTransition(from, to,
converter.from_bytes(utf8Symbol)[0]);
821 if (!p->transitions.empty()) {
823 stateNames.reserve(p->transitions.size());
824 stateNames.push_back(p->initial);
825 for (
auto const& fromTo : p->transitions) {
826 if (fromTo.first != p->initial) {
827 stateNames.push_back(fromTo.first);
832 for (
size_t q = 0; q < stateNames.size(); q++) {
833 auto const& vias = p->transitions[stateNames[q]];
835 for (
auto const& viaTo : vias) {
837 for (
size_t p = 0; p < stateNames.size(); p++) {
838 if (viaTo.second.find(stateNames[p]) != viaTo.second.cend()) {
839 newTos.insert(
string(prefix).append(std::to_string(p)));
842 newVias.insert(make_pair(viaTo.first, newTos));
844 newTr.insert(make_pair(
string(prefix).append(std::to_string(q)), newVias));
845 if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end()) {
846 newAcc.insert(
string(prefix).append(std::to_string(q)));
849 p->initial =
string(
string(prefix).append(
"0"));
850 p->transitions = newTr;
851 p->acceptingStates = newAcc;
865 string newInitial(
"q0");
866 if (!p->transitions.empty()) {
868 for (
auto const& fromVia : p->transitions) {
870 for (
auto const& viaTo : fromVia.second) {
872 for (
auto const& to : viaTo.second) {
873 tempTo.insert(
string(to.length() + 1,
'1').replace(1, string::npos, to));
875 tempVia.insert(make_pair(viaTo.first, tempTo));
877 tempTr.insert(make_pair(
string(fromVia.first.length() + 1,
'1').replace(1, string::npos, fromVia.first), tempVia));
879 p->transitions = tempTr;
881 for (
auto const& acc : p->acceptingStates) {
882 tempAcc.insert(
string(acc.length() + 1,
'1').replace(1, string::npos, acc));
884 p->acceptingStates = tempAcc;
885 p->initial =
string(p->initial.length() + 1,
'1').replace(1, string::npos, p->initial);
886 addTransition(newInitial, p->initial, U
'\0');
888 makeInitial(newInitial);
890 p->alphabet.insert(oAlphabet.cbegin(), oAlphabet.cend());
893 string newQLabel =
string(qLabel.length() + 1,
'2').replace(1, string::npos, qLabel);
895 for (char32_t symbol : oAlphabet) {
900 tempTo.insert(
string(pLabel.length() + 1,
'2').replace(1, string::npos, pLabel));
902 tempVia.insert(make_pair(symbol, tempTo));
904 p->transitions.insert(make_pair(newQLabel, tempVia));
906 p->acceptingStates.insert(newQLabel);
909 addTransition(newInitial, newQLabel, U
'\0');
923 if (!p->transitions.empty()) {
925 stateNames.reserve(p->transitions.size());
926 stateNames.push_back(p->initial);
927 for (
auto const& fromTo : p->transitions) {
928 if (fromTo.first != p->initial) {
929 stateNames.push_back(fromTo.first);
933 size_t commonSymbols = 0;
934 for (char32_t symbol : p->alphabet) {
935 if (
index_of(oAlph, symbol) < oAlph.size()) {
938 for (
auto & fromVia : p->transitions) {
939 fromVia.second.erase(symbol);
943 p->alphabet.insert(oAlph.begin(), oAlph.end());
947 for (
size_t q = 0; q < stateNames.size(); q++) {
949 string potentialName = stateNames[q] + other.
getLabelOf(qq);
950 for (
auto nameIt = newTr.find(potentialName); nameIt != newTr.end(); nameIt = newTr.find(potentialName)) {
951 potentialName.append(
"_");
953 auto nameIt = newTr.emplace(potentialName, commonSymbols);
954 pairToName.emplace(q, 1).first->second.emplace(qq, &(nameIt.first->first));
955 if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end() && other.
isAccepting(qq)) {
956 newAcc.insert(potentialName);
959 p->transitions[stateNames[q]][U
'\0'].insert(stateNames[q]);
962 for (
size_t q = 0; q < stateNames.size(); q++) {
963 auto const& qLabel = stateNames[q];
964 auto const& viaTos = p->transitions[qLabel];
965 for (
auto const& viaTo : viaTos) {
968 for (
auto const& to : viaTo.second) {
969 size_t p =
index_of(stateNames, to);
970 for (
size_t pp = 0; pp < reached.size(); pp++) {
971 if (reached[pp] || (pp == qq && viaTo.first == U
'\0')) {
973 newTr[*(pairToName[q][qq])][viaTo.first].insert(*(pairToName[p][pp]));
980 for (
auto & fromVia : newTr) {
981 auto const& from = fromVia.first;
982 auto to = fromVia.second.find(U
'\0');
983 to->second.erase(from);
984 if (to->second.empty()) {
985 fromVia.second.erase(to);
988 p->transitions = newTr;
989 p->acceptingStates = newAcc;
990 p->initial = *(pairToName[0][0]);
1000 p->transitions[p->initial];
1001 p->alphabet.insert(U
'\0');
1003 std::sort(alphabetV.begin(), alphabetV.end());
1005 labelV[0] = p->initial;
1007 acceptingV[0] = (p->acceptingStates.find(p->initial) != p->acceptingStates.end());
1009 for (
auto entry : p->transitions) {
1010 if (entry.first == p->initial) {
1013 labelV[i] = entry.first;
1015 p->acceptingStates.find(entry.first) != p->acceptingStates.end()
1019 p->transitions.size(),
1021 p->alphabet.size()+1,
1027 for (
auto const& q : labelV) {
1030 for (char32_t symbol : alphabetV) {
1031 auto to = fromQ.find(symbol);
1034 for (
auto const& p : labelV) {
1035 transitionV[qi][si][i++] = (viaSymbol.count(p) != 0);
1041 return {alphabetV, transitionV, labelV, acceptingV};
1044 nfa::builder::~builder() =
default;
Ntransitionmap transitions
Transition function (state × symbol → set of states) of the prospective NFA.
bool isAccepting(size_t qi) const
Tests whether a state is an accept state within this DFA.
nfa build()
Builds the NFA, as defined by previous operations.
static nfa::builder complement(nfa const &n)
Creates a builder for an NFA accepting the complement of the language of an NFA.
std::string getShortestUtf8Word() const
Same as above for a UTF-8-encoded word.
size_t getNumberOfSymbols() const
Counts this NFA's alphabet symbols.
Represents nondeterministic finite automata with ε-moves.
std::vector< char32_t > const & getAlphabet() const
Fetches this NFA's set of processable symbols.
std::string const & getLabelOf(size_t q) const
Puts a name to an index.
Represents deterministic finite automata.
std::vector< std::string > const & getUtf8Alphabet() const
Fetches this NFA's set of processable symbols as UTF-8-encoded strings.
vector< vector< valarray< bool > > > transitions
Stores the transition function as a table viz state index × symbol index → list of bools with true a...
Constructs NFAs step by step.
std::wstring_convert< std::codecvt_utf8< char32_t >, char32_t > converter
Converts between UTF-8-encoded and UTF-32-encoded strings.
builder & minimize()
Convenience method for chaining purge and merge to achieve proper minimization.
bool operator==(nfa const &n) const
Tests whether this NFA accepts exactly the same language as another one.
pImpl()
Constructs private implementation object for an NFA accepting the empty language ∅.
vector< string > labels
Stores the names of states.
static nfa::builder intersect(nfa const &n1, nfa const &n2)
Creates a builder for an NFA accepting the intersection of the languages of two NFAs.
builder & unite(nfa const &other)
Makes the prospective NFA also accept every word of another NFA's language.
size_t delta(size_t qi, size_t si) const
Computes this DFA's transition function for a state index and a symbol index.
nfa & operator=(nfa const &n)
Copy-assigns this NFA by copying another one's private implementation object.
std::vector< char32_t > const & getAlphabet() const
Fetches this DFA's set of processable symbols.
string initial
Name of the prospective NFA's initial state.
std::valarray< bool > encodeSet(state_set const &qs) const
Translates a set of states from set to bool representation.
bool operator!=(nfa const &n) const
Tests whether this NFA doesn't accept the same language as another one.
builder & addSymbol(char32_t symbol)
Adds a symbol to the prospective NFA's alphabet.
state_set getStatesSet() const
Fetches this NFA's set of states as a set of (references to) strings.
builder & setAccepting(std::string const &state, bool accept)
Sets whether or not a state will be accepting within the prospective NFA.
builder & normalizeStateNames(std::string const &prefix)
Reduces the prospective NFA's state names to consecutive numbers, prefixed with a given string...
Contains the reg::nfa class definition.
valarray< bool > accepting
A true value marks an index as belonging to an accept state.
Contains the reg::expression class defintion.
vector< char32_t > alphabet
Represents the set of processable symbols.
state_set decodeSet(std::valarray< bool > const &qs) const
Translates a set of states from bool to set representation.
bool isAccepting(size_t qi) const
Tests whether a state is an accept state within this NFA.
size_t index_of(vector< T > const &vec, T const &element)
Basically Java's List interface's indexOf, but as a non-member function and returning the container's...
std::u32string getShortestWord() const
Searches the shortest UTF-32-encoded word accepted by this NFA.
static nfa::builder unite(nfa const &n1, nfa const &n2)
Creates a builder for an NFA accepting the union of the languages of two NFAs.
builder & complement()
Inverts the prospective DFA's language with respect to all possible strings over its alphabet...
Private implementation details of NFAs.
std::unordered_set< std::reference_wrapper< std::string const >, hash_reference_string_const, equal_to_reference_string_const > state_set
Nicer name for sets of names of states. Should store references to names in getStates.
unordered_set< char32_t > alphabet
Set of symbols processable by the prospective NFA.
Constructs DFAs step by step.
vector< valarray< bool > > epsClosures
Cache for every state's ε-closures.
size_t getNumberOfStates() const
Counts this DFA's states.
builder & makeInitial(std::string const &state)
Resets the initial state for the prospective NFA.
vector< string > utf8Alphabet
Represents the set of processable symbols as UTF-8-encoded strings.
builder & intersect(nfa const &other)
Makes the prospective NFA accept only words accepted also by another NFA.
Private implementation details of NFA builders.
std::shared_ptr< dfa const > equivalent
Stores a minimal DFA accepting the same language as this object's NFA.
size_t getNumberOfStates() const
Counts this NFA's states.
static nfa::builder subtract(nfa const &n1, nfa const &n2)
Creates a builder for an NFA accepting the set difference of the languages of two NFAs...
pImpl(string &initial, unordered_set< string > &acceptingStates, unordered_set< char32_t > &alphabet, Ntransitionmap &transitions)
Constructs private implementation object with provided members.
std::valarray< bool > getStatesBools() const
Fetches this NFA's set of states encoded as an array of bools.
std::valarray< bool > deltaHat(size_t qi, std::u32string const &word) const
Computes this NFA's transition function recursively for a string of symbols, starting in a state spec...
pImpl(vector< char32_t > &alphabet, vector< vector< valarray< bool >>> &transitions, vector< string > &labels, valarray< bool > &acceptingStates)
Constructs private implementation object with provided members and empty ε-closure cache...
dfa const & getEquivalentDfa(nfa const *owner)
Returns the equivalent DFA safely, constructing it if it wasn't already.
bool hasAccepting(std::valarray< bool > const &qs) const
Tests whether a set of states contains an accept state within this NFA.
Where this library lives.
nfa()
Constructs an NFA accepting the empty language ∅.
string findShortestUtf8Word(dfa const &d)
Same as above for a UTF-8-encoded word.
builder & operator=(builder const &b)
Copy-assigns a builder by copying another one's private implementation object.
std::string const & getInitialState() const
Names this NFA's initial state.
u32string findShortestWord(dfa const &d)
Searches the shortest UTF-32-encoded word accepted by a given DFA.
unordered_set< string > acceptingStates
Set of names of the prospective NFA's accept states.
builder & addTransition(std::string const &from, std::string const &to, char32_t symbol)
Adds a transition for the prospective NFA.
std::valarray< bool > const & delta(size_t qi, size_t si) const
Computes this NFA's transition function for a state index and a symbol index.
dfa build()
Builds the DFA, as defined by previous operations, including completion.
std::string const & getLabelOf(size_t qi) const
Puts a name to an index.
std::valarray< bool > const & epsilonClosure(size_t qi) const
Computes a state's ε-closure.
std::vector< std::string > const & getStates() const
Fetches this NFA's set of states in order of internal representation.
pImpl()=default
Constructs empty private implementation object.
Contains the reg::dfa class definition.
builder & addSymbol(char32_t symbol)
Adds a symbol to the prospective DFA's alphabet.
builder()
Constructs a blank builder object.