7 using std::make_unique;
12 #include <unordered_set> 15 #include <unordered_map> 18 #include <forward_list> 35 for (
bool v : value) {
46 return (lhs.size() == rhs.size() && (lhs == rhs).
min() ==
true);
73 for (char32_t symbol : this->alphabet) {
91 for (
size_t p(0); p < q; p++) {
93 distinguishable[q][p] = (qAcc != pAcc);
94 distinguishable[p][q] = (qAcc != pAcc);
101 for (
size_t p(0); p < q; p++) {
102 if (distinguishable[q][p]) {
105 for (
size_t s(0); s <
transitions[q].size(); s++) {
108 if (distinguishable[qS][pS]) {
110 distinguishable[q][p] =
true;
111 distinguishable[p][q] =
true;
119 for (
size_t i(0); i < distinguishable.size(); i++) {
120 distinguishable[i] ^= allTrue;
122 return distinguishable;
134 p(make_unique<pImpl>(alphabet, transitions, labels, acceptingStates)) {}
146 p.reset(
new pImpl(*(d.p)));
161 dfa::~dfa() =
default;
163 #ifndef DOXYGEN_SHOULD_SKIP_THIS 164 dfa::operator
nfa const&()
const {
165 if (!p->equivalent) {
168 return *p->equivalent;
170 #endif // DOXYGEN_SHOULD_SKIP_THIS 179 size_t unitedAlphabetSize(p->alphabet.size());
181 if (find(p->alphabet.begin(), p->alphabet.end(), symbol) == p->alphabet.end()) {
182 unitedAlphabet.push_front(symbol);
183 unitedAlphabetSize++;
191 for (char32_t symbol : unitedAlphabet) {
193 row[s] =
delta(q, symbol);
204 for (char32_t symbol : unitedAlphabet) {
214 for (
size_t s(0); s < unitedAlphabetSize; s++) {
225 return operator==(static_cast<dfa const&>(n));
240 if (si >= p->alphabet.size()) {
243 if (qi >= p->labels.size()) {
246 return p->transitions[qi][si];
266 size_t dfa::delta(
size_t qi,
string const& utf8Symbol)
const {
278 string const&
dfa::delta(
string const& q, char32_t symbol)
const {
287 string const&
dfa::delta(
string const& q,
string const& utf8Symbol)
const {
300 for (char32_t symbol : word) {
301 qi =
delta(qi, symbol);
324 string const&
dfa::deltaHat(
string const& q,
string const& utf8Word)
const {
372 return p->utf8Alphabet;
392 if (qi >= p->labels.size()) {
395 return p->accepting[qi];
420 if (p->accepting[0]) {
425 shortestWords.emplace(0, U
"");
426 while (shortestWords.size() > oldSize) {
427 oldSize = shortestWords.size();
428 for (
auto const& stateWord : shortestWords) {
429 for (
auto symbol : p->alphabet) {
430 size_t reached = d.
delta(stateWord.first, symbol);
431 u32string shWord = stateWord.second + symbol;
432 if (p->accepting[reached]) {
435 if (shortestWords.find(reached) == shortestWords.end()) {
436 shortestWords.emplace(reached, shWord);
534 auto via = from.find(symbol);
535 if (via != from.end() && (*via).second != q) {
558 bool trashUsed(
false);
559 string const& trashState = [&]{
560 for (
auto const& entry : this->transitions) {
570 if (from.second.find(symbol) == from.second.end()) {
571 from.second[symbol] = trashState;
572 trashUsed |= (from.first != trashState);
591 p = make_unique<pImpl>(initial, alphabet, labels, transitions);
607 alphabet.erase(alphabet.begin());
611 transitions[initial];
612 p = make_unique<pImpl>(initial, alphabetS, acceptingStates, transitions);
613 while (stackSize != 0) {
617 for (char32_t symbol : p->alphabet) {
620 auto equalToNext = [&](
valarray<bool> const& ref)->
bool{
return (next == ref).min() ==
true;};
621 if (!equalToNext(current) &&
622 none_of(stack.begin(), stack.end(), equalToNext) &&
623 none_of(done.begin(), done.end(), equalToNext)) {
624 stack.push_front(next);
628 for (
size_t qi(0); qi < current.size(); qi++) {
634 done.insert(current);
648 p.reset(
new pImpl(*(b.p)));
663 dfa::builder::~builder() =
default;
666 dfa::builder::operator
dfa() {
676 p->alphabet.insert(symbol);
682 return addSymbol(
converter.from_bytes(utf8Symbol)[0]);
692 if (p->transitions.empty()) {
695 p->transitions[state];
697 p->acceptingStates.insert(state);
699 p->acceptingStates.erase(state);
711 p->transitions[state];
726 if (p->transitions.empty()) {
730 p->transitions[from][symbol] = to;
737 return defineTransition(from, to,
converter.from_bytes(utf8Symbol)[0]);
755 std::sort(sortedAlphabet.begin(), sortedAlphabet.end());
757 orderedStates.reserve(p->transitions.size());
758 orderedStates.push_back(p->initial);
759 for (
auto const& entry : p->transitions) {
760 if (entry.first != p->initial) {
761 orderedStates.push_back(entry.first);
764 for (
size_t q(0); q < orderedStates.size(); q++) {
765 string const& qLabel(orderedStates[q]);
766 if (p->acceptingStates.find(qLabel) != p->acceptingStates.end()) {
769 for (
size_t s(0); s < sortedAlphabet.size(); s++) {
770 tTable[q].push_back(
index_of(orderedStates, p->transitions[qLabel][sortedAlphabet[s]]));
774 for (
size_t q(0); q < orderedStates.size(); q++) {
775 for (
size_t first(0); first < equivalences[q].size(); first++) {
776 if (equivalences[q][first] && q > first) {
777 string const& superfluous(orderedStates[q]);
778 string const& remaining(orderedStates[first]);
779 p->acceptingStates.erase(superfluous);
780 p->transitions.erase(superfluous);
781 for (
auto& from : p->transitions) {
782 for (
auto& via : from.second) {
783 if (via.second == superfluous) {
784 via.second = remaining;
805 while (newReachable.size() > 0) {
807 newReachable.clear();
808 for (
string const& reachableState : oldReachable) {
809 for (
auto const& reachedState : p->transitions[reachableState]) {
810 if (reachable.insert(reachedState.second).second) {
811 newReachable.insert(reachedState.second);
816 for (
auto it = p->transitions.begin(); it != p->transitions.end(); ) {
817 if (reachable.find(it->first) == reachable.end()) {
818 p->acceptingStates.erase(it->first);
819 it = p->transitions.erase(it);
826 while (newProducing.size() > 0) {
828 newProducing.clear();
829 for (
auto const& from : p->transitions) {
830 for (
auto const& via : from.second) {
831 if (producing.find(via.second) != producing.end()) {
832 if (producing.insert(from.first).second) {
833 newProducing.insert(from.first);
840 for (
auto it = p->transitions.begin(); it != p->transitions.end(); ) {
841 if (producing.find(it->first) == producing.end() && it->first != p->initial) {
842 p->acceptingStates.erase(it->first);
843 for (
auto& via : p->transitions) {
844 for (
auto itv = via.second.begin(); itv != via.second.end(); ) {
845 if (itv->second == it->first) {
846 itv = via.second.erase(itv);
852 it = p->transitions.erase(it);
862 return purge().
merge();
873 if (!p->transitions.empty()) {
875 for (char32_t symbol : oAlph) {
878 size_t trash = other.
getNumberOfStates() * !!(p->alphabet.size() - oAlph.size());
881 stateNames.reserve(p->transitions.size());
882 stateNames.push_back(p->initial);
883 for (
auto const& fromVia : p->transitions) {
884 if (fromVia.first != p->initial) {
885 stateNames.push_back(fromVia.first);
891 for (
size_t q = 0; q < stateNames.size(); q++) {
893 string potentialName = stateNames[q];
899 for (
auto nameIt = newTr.find(potentialName); nameIt != newTr.end(); nameIt = newTr.find(potentialName)) {
900 potentialName.append(
"_");
902 auto nameIt = newTr.emplace(potentialName, p->alphabet.size());
903 pairToName.emplace(q, 1).first->second.emplace(qq, &(nameIt.first->first));
905 if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end() || other.
isAccepting(qq)) {
906 newAcc.insert(potentialName);
913 for (
size_t q = 0; q < stateNames.size(); q++) {
914 auto const& qLabel = stateNames[q];
915 auto const& viaTos = p->transitions[qLabel];
916 for (
auto const& viaTo : viaTos) {
917 size_t p =
index_of(stateNames, viaTo.second);
921 pp = other.
delta(qq, viaTo.first);
925 newTr[*(pairToName[q][qq])][viaTo.first] = *(pairToName[p][pp]);
929 p->transitions = newTr;
930 p->acceptingStates = newAcc;
931 p->initial = *(pairToName[0][0]);
951 if (!p->transitions.empty()) {
953 stateNames.reserve(p->transitions.size());
954 stateNames.push_back(p->initial);
955 for (
auto const& fromTo : p->transitions) {
956 if (fromTo.first != p->initial) {
957 stateNames.push_back(fromTo.first);
961 size_t commonSymbols = 0;
962 for (char32_t symbol : p->alphabet) {
963 if (
index_of(oAlph, symbol) < oAlph.size()) {
966 for (
auto & fromVia : p->transitions) {
967 fromVia.second.erase(symbol);
971 p->alphabet.insert(oAlph.begin(), oAlph.end());
975 for (
size_t q = 0; q < stateNames.size(); q++) {
977 string potentialName = stateNames[q] + other.
getLabelOf(qq);
978 for (
auto nameIt = newTr.find(potentialName); nameIt != newTr.end(); nameIt = newTr.find(potentialName)) {
979 potentialName.append(
"_");
981 auto nameIt = newTr.emplace(potentialName, commonSymbols);
982 pairToName.emplace(q, 1).first->second.emplace(qq, &(nameIt.first->first));
983 if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end() && other.
isAccepting(qq)) {
984 newAcc.insert(potentialName);
988 for (
size_t q = 0; q < stateNames.size(); q++) {
989 auto const& qLabel = stateNames[q];
990 auto const& viaTos = p->transitions[qLabel];
991 for (
auto const& viaTo : viaTos) {
992 size_t p =
index_of(stateNames, viaTo.second);
994 size_t pp = other.
delta(qq, viaTo.first);
995 newTr[*(pairToName[q][qq])][viaTo.first] = *(pairToName[p][pp]);
999 p->transitions = newTr;
1000 p->acceptingStates = newAcc;
1001 p->initial = *(pairToName[0][0]);
1009 for (
auto const& fromVia : p->transitions) {
1010 if (!p->acceptingStates.erase(fromVia.first)) {
1011 p->acceptingStates.insert(fromVia.first);
1019 if (!p->transitions.empty()) {
1021 stateNames.reserve(p->transitions.size());
1022 stateNames.push_back(p->initial);
1023 for (
auto const& fromTo : p->transitions) {
1024 if (fromTo.first != p->initial) {
1025 stateNames.push_back(fromTo.first);
1030 for (
size_t q = 0; q < stateNames.size(); q++) {
1031 auto const& vias = p->transitions[stateNames[q]];
1033 for (
auto const& viaTo : vias) {
1034 newVias.insert(make_pair(viaTo.first, prefix + to_string(
index_of(stateNames, viaTo.second))));
1036 newTr.insert(make_pair(prefix + to_string(q), newVias));
1037 if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end()) {
1038 newAcc.insert(prefix + to_string(q));
1041 p->initial = prefix +
"0";
1042 p->transitions = newTr;
1043 p->acceptingStates = newAcc;
1054 std::sort(alphabetV.begin(), alphabetV.end());
1057 labelV.reserve(p->transitions.size());
1058 labelV.push_back(p->initial);
1059 for (
auto const& tr : p->transitions) {
1060 if (tr.first == p->initial) {
1063 labelV.push_back(tr.first);
1067 for (
size_t q(0); q < labelV.size(); q++) {
1068 string const& qLabel = labelV[q];
1069 acceptingV[q] = (p->acceptingStates.find(qLabel) != p->acceptingStates.end());
1070 for (
size_t s(0); s < alphabetV.size(); s++) {
1071 transitionV[q][s] =
index_of(labelV, p->transitions[qLabel][alphabetV[s]]);
1074 return {alphabetV, transitionV, labelV, acceptingV};
static dfa::builder complement(dfa const &d)
Creates a builder for a DFA accepting the complement of the language of a DFA.
std::string const & getInitialState() const
Names this DFA's initial state.
builder & setAccepting(std::string const &state, bool accept)
Sets whether or not a state will be accepting within the prospective DFA.
bool isAccepting(size_t qi) const
Tests whether a state is an accept state within this DFA.
builder & unite(dfa const &other)
Makes the prospective DFA also accept every word of another DFA's language.
nfa build()
Builds the NFA, as defined by previous operations.
Dtransitionmap transitions
Transition function (state × symbol → state) of the prospective DFA.
bool isTrashState(string const &q) const
Tests whether all of a state's outgoing transitions point to itself.
vector< char32_t > alphabet
Represents the set of processable symbols.
dfa()
Constructs a DFA accepting the empty language ∅.
Represents nondeterministic finite automata with ε-moves.
valarray< bool > accepting
A true value marks an index as belonging to an accept state.
std::vector< char32_t > const & getAlphabet() const
Fetches this NFA's set of processable symbols.
vector< string > utf8Alphabet
Represents the set of processable symbols as UTF-8-encoded strings.
bool operator==(dfa const &d) const
Tests whether this DFA accepts exactly the same language as another one.
std::string const & getLabelOf(size_t q) const
static dfa::builder unite(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the union of the languages of two DFAs.
Represents deterministic finite automata.
Private implementation details of DFAs.
Constructs NFAs step by step.
std::wstring_convert< std::codecvt_utf8< char32_t >, char32_t > converter
Converts between UTF-8-encoded and UTF-32-encoded strings.
builder & minimize()
Convenience method for chaining purge and merge to achieve proper minimization.
vector< string > labels
Stores the names of states.
vector< vector< size_t > > transitions
Stores the transition function as a table viz state index × symbol index → state index...
builder & purge()
Purges the prospective DFA of unreachable and non-producing states, allowing for minimization.
friend std::string findShortestUtf8Word(dfa const &d)
Same as above for a UTF-8-encoded word.
unordered_set< char32_t > alphabet
Set of symbols processable by the prospective DFA.
size_t delta(size_t qi, size_t si) const
Computes this DFA's transition function for a state index and a symbol index.
std::vector< char32_t > const & getAlphabet() const
Fetches this DFA's set of processable symbols.
pImpl(vector< char32_t > &alphabet, vector< vector< size_t >> &transitions, vector< string > &labels, valarray< bool > &accepting)
Constructs private implementation object with provided members.
static vector< valarray< bool > > indistinguishableStates(vector< vector< size_t >> const &transitions, valarray< bool > const &accepting)
Builds the table of indistinguishable states w.r.t. a transition function.
bool operator!=(dfa const &d) const
Tests whether this DFA doesn't accept the same language as another one.
size_t getNumberOfSymbols() const
Private implementation details of DFA builders.
size_t index_of(C const &container, T const &element)
Basically Java's List interface's indexOf, but as a non-member function and returning the container's...
Contains the reg::nfa class definition.
size_t deltaHat(size_t qi, std::u32string const &word) const
Computes this DFA's transition function recursively for a string of symbols, starting in a state spec...
static dfa::builder subtract(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the set difference of the languages of two DFAs.
friend std::u32string findShortestWord(dfa const &d)
Searches the shortest UTF-32-encoded word accepted by a given DFA.
bool isAccepting(size_t qi) const
Tests whether a state is an accept state within this NFA.
builder & complement()
Inverts the prospective DFA's language with respect to all possible strings over its alphabet...
void complete()
Totalizes a partial transition function by pointing any undefined transitions towards a trash state...
std::vector< std::string > const & getStates() const
Fetches this DFA's set of states.
builder & makeInitial(std::string const &state)
Resets the initial state for the prospective DFA.
builder & merge()
Merges the prospective DFA's indistinguishable states, allowing for minimization. ...
Constructs DFAs step by step.
size_t getNumberOfStates() const
builder & normalizeStateNames(std::string const &prefix)
Reduces the prospective NFA's state names to consecutive numbers, prefixed with a given string...
string initial
Name of the prospective DFA's initial state.
builder()
Constructs a blank builder object.
static dfa::builder intersect(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the intersection of the languages of two DFAs.
std::valarray< bool > deltaHat(size_t qi, std::u32string const &word) const
Computes this NFA's transition function recursively for a string of symbols, starting in a state spec...
string const & generateNewState()
Generates a uniquely named new state and adds it to the set of states.
Where this library lives.
unordered_set< string > acceptingStates
Set of names of the prospective DFA's accept states.
std::u32string getShortestWord() const
builder & intersect(dfa const &other)
Makes the prospective DFA accept only words accepted also by another DFA.
builder & operator=(const builder &b)
Copy-assigns a builder by copying another one's private implementation object.
string findShortestUtf8Word(dfa const &d)
Same as above for a UTF-8-encoded word.
pImpl()
Constructs private implementation object for a DFA accepting the empty language ∅.
std::string getShortestUtf8Word() const
pImpl()=default
Constructs empty private implementation object.
u32string findShortestWord(dfa const &d)
Searches the shortest UTF-32-encoded word accepted by a given DFA.
dfa build()
Builds the DFA, as defined by previous operations, including completion.
std::string const & getLabelOf(size_t qi) const
std::valarray< bool > const & epsilonClosure(size_t qi) const
Computes a state's ε-closure.
std::vector< std::string > const & getUtf8Alphabet() const
Fetches this DFA's set of processable symbols as UTF-8-encoded strings.
builder & defineTransition(std::string const &from, std::string const &to, char32_t symbol)
(Re-)Defines a transition for the prospective DFA.
std::shared_ptr< nfa const > equivalent
Holds an equivalent NFA in case it is ever needed.
Contains the reg::dfa class definition.
pImpl(string &initial, unordered_set< char32_t > &alphabet, unordered_set< string > &acceptingStates, Dtransitionmap &transitions)
Constructs private implementation object with provided members.
unordered_map< string, unordered_map< char32_t, string > > Dtransitionmap
Shorthand for the map from state name and transition symbol to target state.
builder & addSymbol(char32_t symbol)
Adds a symbol to the prospective DFA's alphabet.
dfa & operator=(dfa const &d)
Copy-assigns this DFA by copying another one's private implementation object.