6 using std::make_unique;
11 #include <unordered_set> 12 using std::unordered_set;
14 #include <unordered_map> 15 using std::unordered_map;
17 #include <forward_list> 18 using std::forward_list;
50 for (char32_t symbol : this->alphabet) {
63 vector<valarray<bool>> distinguishable;
67 distinguishable.push_back(valarray<bool>(
false,
transitions.size()));
68 for (
size_t p(0); p < q; p++) {
70 distinguishable[q][p] = (qAcc != pAcc);
71 distinguishable[p][q] = (qAcc != pAcc);
78 for (
size_t p(0); p < q; p++) {
79 if (distinguishable[q][p]) {
85 if (distinguishable[qS][pS]) {
87 distinguishable[q][p] =
true;
88 distinguishable[p][q] =
true;
96 for (
size_t i(0); i < distinguishable.size(); i++) {
97 distinguishable[i] ^= allTrue;
99 return distinguishable;
107 dfa::dfa(vector<char32_t>& alphabet,
108 vector<vector<size_t>>& transitions,
109 vector<string>& labels,
110 valarray<bool>& acceptingStates) :
111 p(make_unique<pImpl>(alphabet, transitions, labels, acceptingStates)) {}
129 p.reset(
new pImpl(*(d.p)));
144 dfa::~dfa() =
default;
152 forward_list<char32_t> unitedAlphabet(p->alphabet.begin(), p->alphabet.end());
153 size_t unitedAlphabetSize(p->alphabet.size());
155 if (find(p->alphabet.begin(), p->alphabet.end(), symbol) == p->alphabet.end()) {
156 unitedAlphabet.push_front(symbol);
157 unitedAlphabetSize++;
164 vector<size_t>& row = tTable[q];
165 for (char32_t symbol : unitedAlphabet) {
167 row[s] =
delta(q, symbol);
168 }
catch (std::domain_error e) {
178 for (char32_t symbol : unitedAlphabet) {
181 }
catch (std::domain_error e) {
188 for (
size_t s(0); s < unitedAlphabetSize; s++) {
204 size_t i =
index_of(p->alphabet, symbol);
205 if (i >= p->alphabet.size()) {
206 throw std::domain_error(
207 string(u8
"δ is not defined for symbol ") +
converter.to_bytes(symbol)
210 if (q >= p->labels.size()) {
211 throw std::out_of_range(
212 string(
"There is no state with index ").append(std::to_string(q))
215 return p->transitions[q][i];
219 size_t dfa::delta(
size_t q,
string const& utf8Symbol)
const {
230 for (char32_t symbol : word) {
231 q =
delta(q, symbol);
255 if (p->accepting[0]) {
258 unordered_map<size_t, u32string> shortestWords(p->labels.size());
259 shortestWords.emplace(0, U
"");
261 for (
auto const& stateWord : shortestWords) {
262 for (
auto symbol : p->alphabet) {
263 size_t reached =
delta(stateWord.first, symbol);
264 u32string shWord = stateWord.second + symbol;
265 if (p->accepting[reached]) {
268 if (shortestWords.find(reached) == shortestWords.end()) {
269 shortestWords.emplace(reached, shWord);
287 return p->labels.at(q);
303 return p->utf8Alphabet;
311 return p->labels.size();
320 if (q >= p->labels.size()) {
321 throw std::out_of_range(
322 string(
"There is no state with index ").append(std::to_string(q))
325 return p->accepting[q];
380 using Dtransitionmap = unordered_map<string, unordered_map<char32_t, string>>;
412 auto via = from.find(symbol);
413 if (via != from.end() && (*via).second != q) {
430 return transitions.insert(std::make_pair(newState, unordered_map<char32_t, string>())).first->first;
436 bool trashUsed(
false);
437 string const& trashState = [&]{
438 for (
auto const& entry : this->transitions) {
448 if (from.second.find(symbol) == from.second.end()) {
449 from.second[symbol] = trashState;
450 trashUsed |= (from.first != trashState);
461 size_t operator()(valarray<bool>
const& value)
const {
463 for (
bool v : value) {
473 bool fullTrue =
true;
482 valarray<bool>
const& val;
486 bool operator()(valarray<bool>
const& other)
const {
493 bool operator()(valarray<bool>
const& a, valarray<bool>
const& b)
const {
508 p = make_unique<pImpl>(initial, alphabet, labels, transitions);
524 alphabet.erase(alphabet.begin());
525 unordered_set<char32_t> alphabetS(alphabet.begin(), alphabet.end());
526 unordered_set<string> acceptingStates;
527 Dtransitionmap transitions;
528 transitions[initial];
529 p = make_unique<pImpl>(initial, alphabetS, acceptingStates, transitions);
530 while (stackSize != 0) {
531 valarray<bool> current(move(stack.front()));
534 for (char32_t symbol : p->alphabet) {
535 valarray<bool> next(
nfa.
deltaHat(current, u32string(1, symbol)));
538 if (!equalToNext(current) &&
539 none_of(stack.begin(), stack.end(), equalToNext) &&
540 none_of(done.begin(), done.end(), equalToNext)) {
541 stack.push_front(next);
545 for (
size_t qi(0); qi < current.size(); qi++) {
551 done.insert(current);
565 p.reset(
new pImpl(*(b.p)));
580 dfa::builder::~builder() =
default;
588 p->alphabet.insert(symbol);
594 return addSymbol(
converter.from_bytes(utf8Symbol)[0]);
604 if (p->transitions.empty()) {
607 p->transitions[state];
609 p->acceptingStates.insert(state);
611 p->acceptingStates.erase(state);
623 p->transitions[state];
638 if (p->transitions.empty()) {
642 p->transitions[from][symbol] = to;
649 return defineTransition(from, to,
converter.from_bytes(utf8Symbol)[0]);
664 vector<vector<size_t>> tTable(p->transitions.size());
665 valarray<bool> accepting(
false, p->transitions.size());
666 vector<char32_t> sortedAlphabet(p->alphabet.begin(), p->alphabet.end());
667 std::sort(sortedAlphabet.begin(), sortedAlphabet.end());
668 vector<string> orderedStates;
669 orderedStates.reserve(p->transitions.size());
670 orderedStates.push_back(p->initial);
671 for (
auto const& entry : p->transitions) {
672 if (entry.first != p->initial) {
673 orderedStates.push_back(entry.first);
676 for (
size_t q(0); q < orderedStates.size(); q++) {
677 string const& qLabel(orderedStates[q]);
678 if (p->acceptingStates.find(qLabel) != p->acceptingStates.end()) {
681 for (
size_t s(0); s < sortedAlphabet.size(); s++) {
682 tTable[q].push_back(
index_of(orderedStates, p->transitions[qLabel][sortedAlphabet[s]]));
686 for (
size_t q(0); q < orderedStates.size(); q++) {
687 for (
size_t first(0); first < equivalences[q].size(); first++) {
688 if (equivalences[q][first] && q > first) {
689 string const& superfluous(orderedStates[q]);
690 string const& remaining(orderedStates[first]);
691 p->acceptingStates.erase(superfluous);
692 p->transitions.erase(superfluous);
693 for (
auto& from : p->transitions) {
694 for (
auto& via : from.second) {
695 if (via.second == superfluous) {
696 via.second = remaining;
715 unordered_set<string> reachable(&(p->initial), &(p->initial)+1);
716 unordered_set<string> newReachable(reachable);
717 while (newReachable.size() > 0) {
718 unordered_set<string> oldReachable(move(newReachable));
719 newReachable.clear();
720 for (
string const& reachableState : oldReachable) {
721 for (
auto const& reachedState : p->transitions[reachableState]) {
722 if (reachable.insert(reachedState.second).second) {
723 newReachable.insert(reachedState.second);
728 for (
auto it = p->transitions.begin(); it != p->transitions.end(); ) {
729 if (reachable.find(it->first) == reachable.end()) {
730 p->acceptingStates.erase(it->first);
731 it = p->transitions.erase(it);
736 unordered_set<string> producing(p->acceptingStates);
737 unordered_set<string> newProducing(producing);
738 while (newProducing.size() > 0) {
739 unordered_set<string> oldProducing(move(newProducing));
740 newProducing.clear();
741 for (
auto const& from : p->transitions) {
742 for (
auto const& via : from.second) {
743 if (producing.find(via.second) != producing.end()) {
744 if (producing.insert(from.first).second) {
745 newProducing.insert(from.first);
752 for (
auto it = p->transitions.begin(); it != p->transitions.end(); ) {
753 if (producing.find(it->first) == producing.end() && it->first != p->initial) {
754 p->acceptingStates.erase(it->first);
755 for (
auto& via : p->transitions) {
756 for (
auto itv = via.second.begin(); itv != via.second.end(); ) {
757 if (itv->second == it->first) {
758 itv = via.second.erase(itv);
764 it = p->transitions.erase(it);
774 return purge().
merge();
785 if (!p->transitions.empty()) {
787 for (char32_t symbol : oAlph) {
790 size_t trash = other.
getNumberOfStates() * !!(p->alphabet.size() - oAlph.size());
792 vector<string> stateNames;
793 stateNames.reserve(p->transitions.size());
794 stateNames.push_back(p->initial);
795 for (
auto const& fromVia : p->transitions) {
796 if (fromVia.first != p->initial) {
797 stateNames.push_back(fromVia.first);
802 unordered_map<size_t, unordered_map<size_t, string const*>> pairToName(stateNames.size() * other.
getNumberOfStates());
803 for (
size_t q = 0; q < stateNames.size(); q++) {
805 string potentialName = stateNames[q];
808 }
catch (std::out_of_range e) {
811 for (
auto nameIt = newTr.find(potentialName); nameIt != newTr.end(); nameIt = newTr.find(potentialName)) {
812 potentialName.append(
"_");
814 auto nameIt = newTr.emplace(potentialName, p->alphabet.size());
815 pairToName.emplace(q, 1).first->second.emplace(qq, &(nameIt.first->first));
817 if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end() || other.
isAccepting(qq)) {
818 newAcc.insert(potentialName);
820 }
catch (std::out_of_range e) {
825 for (
size_t q = 0; q < stateNames.size(); q++) {
826 auto const& qLabel = stateNames[q];
827 auto const& viaTos = p->transitions[qLabel];
828 for (
auto const& viaTo : viaTos) {
829 size_t p =
index_of(stateNames, viaTo.second);
833 pp = other.
delta(qq, viaTo.first);
834 }
catch (std::logic_error e) {
837 newTr[*(pairToName[q][qq])][viaTo.first] = *(pairToName[p][pp]);
841 p->transitions = newTr;
842 p->acceptingStates = newAcc;
843 p->initial = *(pairToName[0][0]);
863 if (!p->transitions.empty()) {
864 vector<string> stateNames;
865 stateNames.reserve(p->transitions.size());
866 stateNames.push_back(p->initial);
867 for (
auto const& fromTo : p->transitions) {
868 if (fromTo.first != p->initial) {
869 stateNames.push_back(fromTo.first);
873 size_t commonSymbols = 0;
874 for (char32_t symbol : p->alphabet) {
875 if (
index_of(oAlph, symbol) < oAlph.size()) {
878 for (
auto & fromVia : p->transitions) {
879 fromVia.second.erase(symbol);
883 p->alphabet.insert(oAlph.begin(), oAlph.end());
886 unordered_map<size_t, unordered_map<size_t, string const*>> pairToName(stateNames.size() * other.
getNumberOfStates());
887 for (
size_t q = 0; q < stateNames.size(); q++) {
889 string potentialName = stateNames[q] + other.
getLabelOf(qq);
890 for (
auto nameIt = newTr.find(potentialName); nameIt != newTr.end(); nameIt = newTr.find(potentialName)) {
891 potentialName.append(
"_");
893 auto nameIt = newTr.emplace(potentialName, commonSymbols);
894 pairToName.emplace(q, 1).first->second.emplace(qq, &(nameIt.first->first));
895 if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end() && other.
isAccepting(qq)) {
896 newAcc.insert(potentialName);
900 for (
size_t q = 0; q < stateNames.size(); q++) {
901 auto const& qLabel = stateNames[q];
902 auto const& viaTos = p->transitions[qLabel];
903 for (
auto const& viaTo : viaTos) {
904 size_t p =
index_of(stateNames, viaTo.second);
906 size_t pp = other.
delta(qq, viaTo.first);
907 newTr[*(pairToName[q][qq])][viaTo.first] = *(pairToName[p][pp]);
911 p->transitions = newTr;
912 p->acceptingStates = newAcc;
913 p->initial = *(pairToName[0][0]);
921 for (
auto const& fromVia : p->transitions) {
922 if (!p->acceptingStates.erase(fromVia.first)) {
923 p->acceptingStates.insert(fromVia.first);
931 if (!p->transitions.empty()) {
932 vector<string> stateNames;
933 stateNames.reserve(p->transitions.size());
934 stateNames.push_back(p->initial);
935 for (
auto const& fromTo : p->transitions) {
936 if (fromTo.first != p->initial) {
937 stateNames.push_back(fromTo.first);
940 Dtransitionmap newTr(p->transitions.size());
941 unordered_set<string> newAcc(p->acceptingStates.size());
942 for (
size_t q = 0; q < stateNames.size(); q++) {
943 auto const& vias = p->transitions[stateNames[q]];
944 unordered_map<char32_t,string> newVias(vias.size());
945 for (
auto const& viaTo : vias) {
946 newVias.insert(make_pair(viaTo.first,
string(prefix).append(std::to_string(
index_of(stateNames, viaTo.second)))));
948 newTr.insert(make_pair(
string(prefix).append(std::to_string(q)), newVias));
949 if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end()) {
950 newAcc.insert(
string(prefix).append(std::to_string(q)));
953 p->initial = string(
string(prefix).append(
"0"));
954 p->transitions = newTr;
955 p->acceptingStates = newAcc;
965 vector<char32_t> alphabetV(p->alphabet.begin(), p->alphabet.end());
966 std::sort(alphabetV.begin(), alphabetV.end());
968 vector<string> labelV;
969 labelV.reserve(p->transitions.size());
970 labelV.push_back(p->initial);
971 for (
auto const& tr : p->transitions) {
972 if (tr.first == p->initial) {
975 labelV.push_back(tr.first);
977 valarray<bool> acceptingV(
false, labelV.size());
978 vector<vector<size_t>> transitionV(labelV.size(), vector<size_t>(alphabetV.size()));
979 for (
size_t q(0); q < labelV.size(); q++) {
980 string const& qLabel = labelV[q];
981 acceptingV[q] = (p->acceptingStates.find(qLabel) != p->acceptingStates.end());
982 for (
size_t s(0); s < alphabetV.size(); s++) {
983 transitionV[q][s] =
index_of(labelV, p->transitions[qLabel][alphabetV[s]]);
986 return {alphabetV, transitionV, labelV, acceptingV};
995 template<
class T>
size_t index_of(vector<T>
const& vec, T
const& element) {
996 return static_cast<size_t>(std::distance(vec.begin(), find(vec.begin(), vec.end(), element)));
999 template size_t index_of(vector<char32_t>
const& vec, char32_t
const& element);
1001 std::wstring_convert<std::codecvt_utf8<char32_t>,char32_t>
converter;
bool isAccepting(size_t q) const
Tests whether a state is an accept state within this DFA.
bool operator!=(const dfa &d) const
Tests whether this DFA doesn't accept the same language as another one.
static dfa::builder complement(dfa const &d)
Creates a builder for a DFA accepting the complement of the language of a DFA.
builder & setAccepting(std::string const &state, bool accept)
Sets whether or not a state will be accepting within the prospective DFA.
Comparison object for valarray<bool>s against a specific instance.
builder & unite(dfa const &other)
Makes the prospective DFA also accept every word of another DFA's language.
Dtransitionmap transitions
Transition function (state × symbol → state) of the prospective DFA.
bool isTrashState(string const &q) const
Tests whether all of a state's outgoing transitions point to itself.
size_t delta(size_t q, char32_t symbol) const
Computes this DFA's transition function for a state and a symbol.
vector< char32_t > alphabet
Represents the set of processable symbols.
dfa()
Constructs a DFA accepting the empty language ∅.
Represents nondeterministic finite automata with ε-moves.
valarray< bool > accepting
A true value marks an index as belonging to an accept state.
std::vector< char32_t > const & getAlphabet() const
Fetches this NFA's set of processable symbols.
vector< string > utf8Alphabet
Represents the set of processable symbols as UTF-8-encoded strings.
std::string const & getLabelOf(size_t q) const
Puts a name to an index.
static dfa::builder unite(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the union of the languages of two DFAs.
Comparison object for valarray<bool>s, so they can be used as unordered_set keys. ...
Represents deterministic finite automata.
Private implementation details of DFAs.
std::valarray< bool > deltaHat(size_t q, std::u32string const &word) const
Computes this NFA's transition function recursively for a string of symbols, starting in a specified ...
builder & minimize()
Convenience method for chaining purge and merge to achieve proper minimization.
vector< string > labels
Stores the names of states.
Hasher object for valarray<bool>s, so they can be used as unordered_set keys.
vector< vector< size_t > > transitions
Stores the transition function as a table viz state index × symbol index → state index...
bool isAccepting(size_t q) const
Tests whether a state is an accept state within this NFA.
builder & purge()
Purges the prospective DFA of unreachable and non-producing states, allowing for minimization.
unordered_set< char32_t > alphabet
Set of symbols processable by the prospective DFA.
std::vector< char32_t > const & getAlphabet() const
Fetches this DFA's set of processable symbols.
pImpl(vector< char32_t > &alphabet, vector< vector< size_t >> &transitions, vector< string > &labels, valarray< bool > &accepting)
Constructs private implementation object with provided members.
static vector< valarray< bool > > indistinguishableStates(vector< vector< size_t >> const &transitions, valarray< bool > const &accepting)
Builds the table of indistinguishable states w.r.t. a transition function.
dfa & operator=(const dfa &d)
Copy-assigns this DFA by copying another one's private implementation object.
Private implementation details of DFA builders.
Contains the reg::nfa class definition.
static dfa::builder subtract(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the set difference of the languages of two DFAs.
Contains the reg::expression class defintion.
static bool valarrayFullTrue(valarray< bool > const &a)
Tests whether all of a valarray<bool>'s entries are true.
bool operator==(const dfa &d) const
Tests whether this DFA accepts exactly the same language as another one.
builder & complement()
Inverts the prospective DFA's language with respect to all possible strings over its alphabet...
void complete()
Totalizes a partial transition function by pointing any undefined transitions towards a trash state...
builder & makeInitial(std::string const &state)
Resets the initial state for the prospective DFA.
builder & merge()
Merges the prospective DFA's indistinguishable states, allowing for minimization. ...
Constructs DFAs step by step.
size_t deltaHat(size_t q, std::u32string const &word) const
Computes this DFA's transition function recursively for a string of symbols, starting in a specified ...
size_t getNumberOfStates() const
Counts this DFA's states.
builder & normalizeStateNames(std::string const &prefix)
Reduces the prospective NFA's state names to consecutive numbers, prefixed with a given string...
string initial
Name of the prospective DFA's initial state.
builder()
Constructs a blank builder object.
static dfa::builder intersect(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the intersection of the languages of two DFAs.
string const & generateNewState()
Generates a uniquely named new state and adds it to the set of states.
unordered_set< string > acceptingStates
Set of names of the prospective DFA's accept states.
size_t index_of(vector< T > const &vec, T const &element)
Basically Java's List interface's indexOf, but as a non-member function and returning the container's...
std::u32string getShortestWord() const
Searches the shortest UTF-32-encoded word accepted by this DFA.
builder & intersect(dfa const &other)
Makes the prospective DFA accept only words accepted also by another DFA.
builder & operator=(const builder &b)
Copy-assigns a builder by copying another one's private implementation object.
pImpl()
Constructs private implementation object for a DFA accepting the empty language ∅.
std::string getShortestUtf8Word() const
Same as above for a UTF-8-encoded word, INCLUDING POTENTIAL INFINITE LOOP.
pImpl()=default
Constructs empty private implementation object.
std::string const & getLabelOf(size_t q) const
Puts a name to an index.
dfa build()
Builds the DFA, as defined by previous operations, including completion.
std::vector< std::string > const & getUtf8Alphabet() const
Fetches this DFA's set of processable symbols as UTF-8-encoded strings.
builder & defineTransition(std::string const &from, std::string const &to, char32_t symbol)
(Re-)Defines a transition for the prospective DFA.
std::valarray< bool > const & epsilonClosure(size_t q) const
Computes a state's ε-closure.
Contains the reg::dfa class definition.
std::wstring_convert< std::codecvt_utf8< char32_t >, char32_t > converter
Converts between UTF-8-encoded and UTF-32-encoded strings.
pImpl(string &initial, unordered_set< char32_t > &alphabet, unordered_set< string > &acceptingStates, Dtransitionmap &transitions)
Constructs private implementation object with provided members.
builder & addSymbol(char32_t symbol)
Adds a symbol to the prospective DFA's alphabet.