7 using std::make_unique;
12 #include <unordered_set> 15 #include <unordered_map> 18 #include <forward_list> 35 for (
bool v : value) {
46 return (lhs.size() == rhs.size() && (lhs == rhs).
min() ==
true);
72 for (char32_t symbol : this->alphabet) {
90 for (
size_t p(0); p < q; p++) {
92 distinguishable[q][p] = (qAcc != pAcc);
93 distinguishable[p][q] = (qAcc != pAcc);
100 for (
size_t p(0); p < q; p++) {
101 if (distinguishable[q][p]) {
104 for (
size_t s(0); s <
transitions[q].size(); s++) {
107 if (distinguishable[qS][pS]) {
109 distinguishable[q][p] =
true;
110 distinguishable[p][q] =
true;
118 for (
size_t i(0); i < distinguishable.size(); i++) {
119 distinguishable[i] ^= allTrue;
121 return distinguishable;
133 p(make_unique<pImpl>(alphabet, transitions, labels, acceptingStates)) {}
151 p.reset(
new pImpl(*(d.p)));
166 dfa::~dfa() =
default;
175 size_t unitedAlphabetSize(p->alphabet.size());
177 if (find(p->alphabet.begin(), p->alphabet.end(), symbol) == p->alphabet.end()) {
178 unitedAlphabet.push_front(symbol);
179 unitedAlphabetSize++;
187 for (char32_t symbol : unitedAlphabet) {
189 row[s] =
delta(q, symbol);
200 for (char32_t symbol : unitedAlphabet) {
210 for (
size_t s(0); s < unitedAlphabetSize; s++) {
228 if (si >= p->alphabet.size()) {
231 if (qi >= p->labels.size()) {
234 return p->transitions[qi][si];
254 size_t dfa::delta(
size_t qi,
string const& utf8Symbol)
const {
266 string const&
dfa::delta(
string const& q, char32_t symbol)
const {
275 string const&
dfa::delta(
string const& q,
string const& utf8Symbol)
const {
288 for (char32_t symbol : word) {
289 qi =
delta(qi, symbol);
312 string const&
dfa::deltaHat(
string const& q,
string const& utf8Word)
const {
360 return p->utf8Alphabet;
380 if (qi >= p->labels.size()) {
383 return p->accepting[qi];
408 if (p->accepting[0]) {
413 shortestWords.emplace(0, U
"");
414 while (shortestWords.size() > oldSize) {
415 oldSize = shortestWords.size();
416 for (
auto const& stateWord : shortestWords) {
417 for (
auto symbol : p->alphabet) {
418 size_t reached = d.
delta(stateWord.first, symbol);
419 u32string shWord = stateWord.second + symbol;
420 if (p->accepting[reached]) {
423 if (shortestWords.find(reached) == shortestWords.end()) {
424 shortestWords.emplace(reached, shWord);
522 auto via = from.find(symbol);
523 if (via != from.end() && (*via).second != q) {
546 bool trashUsed(
false);
547 string const& trashState = [&]{
548 for (
auto const& entry : this->transitions) {
558 if (from.second.find(symbol) == from.second.end()) {
559 from.second[symbol] = trashState;
560 trashUsed |= (from.first != trashState);
579 p = make_unique<pImpl>(initial, alphabet, labels, transitions);
595 alphabet.erase(alphabet.begin());
599 transitions[initial];
600 p = make_unique<pImpl>(initial, alphabetS, acceptingStates, transitions);
601 while (stackSize != 0) {
605 for (char32_t symbol : p->alphabet) {
608 auto equalToNext = [&](
valarray<bool> const& ref)->
bool{
return (next == ref).min() ==
true;};
609 if (!equalToNext(current) &&
610 none_of(stack.begin(), stack.end(), equalToNext) &&
611 none_of(done.begin(), done.end(), equalToNext)) {
612 stack.push_front(next);
616 for (
size_t qi(0); qi < current.size(); qi++) {
622 done.insert(current);
636 p.reset(
new pImpl(*(b.p)));
651 dfa::builder::~builder() =
default;
659 p->alphabet.insert(symbol);
665 return addSymbol(
converter.from_bytes(utf8Symbol)[0]);
675 if (p->transitions.empty()) {
678 p->transitions[state];
680 p->acceptingStates.insert(state);
682 p->acceptingStates.erase(state);
694 p->transitions[state];
709 if (p->transitions.empty()) {
713 p->transitions[from][symbol] = to;
720 return defineTransition(from, to,
converter.from_bytes(utf8Symbol)[0]);
738 std::sort(sortedAlphabet.begin(), sortedAlphabet.end());
740 orderedStates.reserve(p->transitions.size());
741 orderedStates.push_back(p->initial);
742 for (
auto const& entry : p->transitions) {
743 if (entry.first != p->initial) {
744 orderedStates.push_back(entry.first);
747 for (
size_t q(0); q < orderedStates.size(); q++) {
748 string const& qLabel(orderedStates[q]);
749 if (p->acceptingStates.find(qLabel) != p->acceptingStates.end()) {
752 for (
size_t s(0); s < sortedAlphabet.size(); s++) {
753 tTable[q].push_back(
index_of(orderedStates, p->transitions[qLabel][sortedAlphabet[s]]));
757 for (
size_t q(0); q < orderedStates.size(); q++) {
758 for (
size_t first(0); first < equivalences[q].size(); first++) {
759 if (equivalences[q][first] && q > first) {
760 string const& superfluous(orderedStates[q]);
761 string const& remaining(orderedStates[first]);
762 p->acceptingStates.erase(superfluous);
763 p->transitions.erase(superfluous);
764 for (
auto& from : p->transitions) {
765 for (
auto& via : from.second) {
766 if (via.second == superfluous) {
767 via.second = remaining;
788 while (newReachable.size() > 0) {
790 newReachable.clear();
791 for (
string const& reachableState : oldReachable) {
792 for (
auto const& reachedState : p->transitions[reachableState]) {
793 if (reachable.insert(reachedState.second).second) {
794 newReachable.insert(reachedState.second);
799 for (
auto it = p->transitions.begin(); it != p->transitions.end(); ) {
800 if (reachable.find(it->first) == reachable.end()) {
801 p->acceptingStates.erase(it->first);
802 it = p->transitions.erase(it);
809 while (newProducing.size() > 0) {
811 newProducing.clear();
812 for (
auto const& from : p->transitions) {
813 for (
auto const& via : from.second) {
814 if (producing.find(via.second) != producing.end()) {
815 if (producing.insert(from.first).second) {
816 newProducing.insert(from.first);
823 for (
auto it = p->transitions.begin(); it != p->transitions.end(); ) {
824 if (producing.find(it->first) == producing.end() && it->first != p->initial) {
825 p->acceptingStates.erase(it->first);
826 for (
auto& via : p->transitions) {
827 for (
auto itv = via.second.begin(); itv != via.second.end(); ) {
828 if (itv->second == it->first) {
829 itv = via.second.erase(itv);
835 it = p->transitions.erase(it);
845 return purge().
merge();
856 if (!p->transitions.empty()) {
858 for (char32_t symbol : oAlph) {
861 size_t trash = other.
getNumberOfStates() * !!(p->alphabet.size() - oAlph.size());
864 stateNames.reserve(p->transitions.size());
865 stateNames.push_back(p->initial);
866 for (
auto const& fromVia : p->transitions) {
867 if (fromVia.first != p->initial) {
868 stateNames.push_back(fromVia.first);
874 for (
size_t q = 0; q < stateNames.size(); q++) {
876 string potentialName = stateNames[q];
882 for (
auto nameIt = newTr.find(potentialName); nameIt != newTr.end(); nameIt = newTr.find(potentialName)) {
883 potentialName.append(
"_");
885 auto nameIt = newTr.emplace(potentialName, p->alphabet.size());
886 pairToName.emplace(q, 1).first->second.emplace(qq, &(nameIt.first->first));
888 if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end() || other.
isAccepting(qq)) {
889 newAcc.insert(potentialName);
896 for (
size_t q = 0; q < stateNames.size(); q++) {
897 auto const& qLabel = stateNames[q];
898 auto const& viaTos = p->transitions[qLabel];
899 for (
auto const& viaTo : viaTos) {
900 size_t p =
index_of(stateNames, viaTo.second);
904 pp = other.
delta(qq, viaTo.first);
908 newTr[*(pairToName[q][qq])][viaTo.first] = *(pairToName[p][pp]);
912 p->transitions = newTr;
913 p->acceptingStates = newAcc;
914 p->initial = *(pairToName[0][0]);
934 if (!p->transitions.empty()) {
936 stateNames.reserve(p->transitions.size());
937 stateNames.push_back(p->initial);
938 for (
auto const& fromTo : p->transitions) {
939 if (fromTo.first != p->initial) {
940 stateNames.push_back(fromTo.first);
944 size_t commonSymbols = 0;
945 for (char32_t symbol : p->alphabet) {
946 if (
index_of(oAlph, symbol) < oAlph.size()) {
949 for (
auto & fromVia : p->transitions) {
950 fromVia.second.erase(symbol);
954 p->alphabet.insert(oAlph.begin(), oAlph.end());
958 for (
size_t q = 0; q < stateNames.size(); q++) {
960 string potentialName = stateNames[q] + other.
getLabelOf(qq);
961 for (
auto nameIt = newTr.find(potentialName); nameIt != newTr.end(); nameIt = newTr.find(potentialName)) {
962 potentialName.append(
"_");
964 auto nameIt = newTr.emplace(potentialName, commonSymbols);
965 pairToName.emplace(q, 1).first->second.emplace(qq, &(nameIt.first->first));
966 if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end() && other.
isAccepting(qq)) {
967 newAcc.insert(potentialName);
971 for (
size_t q = 0; q < stateNames.size(); q++) {
972 auto const& qLabel = stateNames[q];
973 auto const& viaTos = p->transitions[qLabel];
974 for (
auto const& viaTo : viaTos) {
975 size_t p =
index_of(stateNames, viaTo.second);
977 size_t pp = other.
delta(qq, viaTo.first);
978 newTr[*(pairToName[q][qq])][viaTo.first] = *(pairToName[p][pp]);
982 p->transitions = newTr;
983 p->acceptingStates = newAcc;
984 p->initial = *(pairToName[0][0]);
992 for (
auto const& fromVia : p->transitions) {
993 if (!p->acceptingStates.erase(fromVia.first)) {
994 p->acceptingStates.insert(fromVia.first);
1002 if (!p->transitions.empty()) {
1004 stateNames.reserve(p->transitions.size());
1005 stateNames.push_back(p->initial);
1006 for (
auto const& fromTo : p->transitions) {
1007 if (fromTo.first != p->initial) {
1008 stateNames.push_back(fromTo.first);
1013 for (
size_t q = 0; q < stateNames.size(); q++) {
1014 auto const& vias = p->transitions[stateNames[q]];
1016 for (
auto const& viaTo : vias) {
1017 newVias.insert(make_pair(viaTo.first, prefix + to_string(
index_of(stateNames, viaTo.second))));
1019 newTr.insert(make_pair(prefix + to_string(q), newVias));
1020 if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end()) {
1021 newAcc.insert(prefix + to_string(q));
1024 p->initial = prefix +
"0";
1025 p->transitions = newTr;
1026 p->acceptingStates = newAcc;
1037 std::sort(alphabetV.begin(), alphabetV.end());
1040 labelV.reserve(p->transitions.size());
1041 labelV.push_back(p->initial);
1042 for (
auto const& tr : p->transitions) {
1043 if (tr.first == p->initial) {
1046 labelV.push_back(tr.first);
1050 for (
size_t q(0); q < labelV.size(); q++) {
1051 string const& qLabel = labelV[q];
1052 acceptingV[q] = (p->acceptingStates.find(qLabel) != p->acceptingStates.end());
1053 for (
size_t s(0); s < alphabetV.size(); s++) {
1054 transitionV[q][s] =
index_of(labelV, p->transitions[qLabel][alphabetV[s]]);
1057 return {alphabetV, transitionV, labelV, acceptingV};
bool operator!=(const dfa &d) const
Tests whether this DFA doesn't accept the same language as another one.
static dfa::builder complement(dfa const &d)
Creates a builder for a DFA accepting the complement of the language of a DFA.
std::string const & getInitialState() const
Names this DFA's initial state.
builder & setAccepting(std::string const &state, bool accept)
Sets whether or not a state will be accepting within the prospective DFA.
bool isAccepting(size_t qi) const
Tests whether a state is an accept state within this DFA.
builder & unite(dfa const &other)
Makes the prospective DFA also accept every word of another DFA's language.
Dtransitionmap transitions
Transition function (state × symbol → state) of the prospective DFA.
bool isTrashState(string const &q) const
Tests whether all of a state's outgoing transitions point to itself.
vector< char32_t > alphabet
Represents the set of processable symbols.
dfa()
Constructs a DFA accepting the empty language ∅.
Represents nondeterministic finite automata with ε-moves.
valarray< bool > accepting
A true value marks an index as belonging to an accept state.
std::vector< char32_t > const & getAlphabet() const
Fetches this NFA's set of processable symbols.
vector< string > utf8Alphabet
Represents the set of processable symbols as UTF-8-encoded strings.
std::string const & getLabelOf(size_t q) const
static dfa::builder unite(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the union of the languages of two DFAs.
Represents deterministic finite automata.
Private implementation details of DFAs.
std::wstring_convert< std::codecvt_utf8< char32_t >, char32_t > converter
Converts between UTF-8-encoded and UTF-32-encoded strings.
builder & minimize()
Convenience method for chaining purge and merge to achieve proper minimization.
vector< string > labels
Stores the names of states.
vector< vector< size_t > > transitions
Stores the transition function as a table viz state index × symbol index → state index...
builder & purge()
Purges the prospective DFA of unreachable and non-producing states, allowing for minimization.
friend std::string findShortestUtf8Word(dfa const &d)
Same as above for a UTF-8-encoded word.
unordered_set< char32_t > alphabet
Set of symbols processable by the prospective DFA.
size_t delta(size_t qi, size_t si) const
Computes this DFA's transition function for a state index and a symbol index.
std::vector< char32_t > const & getAlphabet() const
Fetches this DFA's set of processable symbols.
pImpl(vector< char32_t > &alphabet, vector< vector< size_t >> &transitions, vector< string > &labels, valarray< bool > &accepting)
Constructs private implementation object with provided members.
static vector< valarray< bool > > indistinguishableStates(vector< vector< size_t >> const &transitions, valarray< bool > const &accepting)
Builds the table of indistinguishable states w.r.t. a transition function.
dfa & operator=(const dfa &d)
Copy-assigns this DFA by copying another one's private implementation object.
size_t getNumberOfSymbols() const
Private implementation details of DFA builders.
size_t index_of(C const &container, T const &element)
Basically Java's List interface's indexOf, but as a non-member function and returning the container's...
Contains the reg::nfa class definition.
size_t deltaHat(size_t qi, std::u32string const &word) const
Computes this DFA's transition function recursively for a string of symbols, starting in a state spec...
static dfa::builder subtract(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the set difference of the languages of two DFAs.
Contains the reg::expression class defintion.
friend std::u32string findShortestWord(dfa const &d)
Searches the shortest UTF-32-encoded word accepted by a given DFA.
bool isAccepting(size_t qi) const
Tests whether a state is an accept state within this NFA.
bool operator==(const dfa &d) const
Tests whether this DFA accepts exactly the same language as another one.
builder & complement()
Inverts the prospective DFA's language with respect to all possible strings over its alphabet...
void complete()
Totalizes a partial transition function by pointing any undefined transitions towards a trash state...
std::vector< std::string > const & getStates() const
Fetches this DFA's set of states.
builder & makeInitial(std::string const &state)
Resets the initial state for the prospective DFA.
builder & merge()
Merges the prospective DFA's indistinguishable states, allowing for minimization. ...
Constructs DFAs step by step.
size_t getNumberOfStates() const
builder & normalizeStateNames(std::string const &prefix)
Reduces the prospective NFA's state names to consecutive numbers, prefixed with a given string...
string initial
Name of the prospective DFA's initial state.
builder()
Constructs a blank builder object.
static dfa::builder intersect(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the intersection of the languages of two DFAs.
std::valarray< bool > deltaHat(size_t qi, std::u32string const &word) const
Computes this NFA's transition function recursively for a string of symbols, starting in a state spec...
string const & generateNewState()
Generates a uniquely named new state and adds it to the set of states.
Where this library lives.
unordered_set< string > acceptingStates
Set of names of the prospective DFA's accept states.
std::u32string getShortestWord() const
builder & intersect(dfa const &other)
Makes the prospective DFA accept only words accepted also by another DFA.
builder & operator=(const builder &b)
Copy-assigns a builder by copying another one's private implementation object.
string findShortestUtf8Word(dfa const &d)
Same as above for a UTF-8-encoded word.
pImpl()
Constructs private implementation object for a DFA accepting the empty language ∅.
std::string getShortestUtf8Word() const
pImpl()=default
Constructs empty private implementation object.
u32string findShortestWord(dfa const &d)
Searches the shortest UTF-32-encoded word accepted by a given DFA.
dfa build()
Builds the DFA, as defined by previous operations, including completion.
std::string const & getLabelOf(size_t qi) const
std::valarray< bool > const & epsilonClosure(size_t qi) const
Computes a state's ε-closure.
std::vector< std::string > const & getUtf8Alphabet() const
Fetches this DFA's set of processable symbols as UTF-8-encoded strings.
builder & defineTransition(std::string const &from, std::string const &to, char32_t symbol)
(Re-)Defines a transition for the prospective DFA.
Contains the reg::dfa class definition.
pImpl(string &initial, unordered_set< char32_t > &alphabet, unordered_set< string > &acceptingStates, Dtransitionmap &transitions)
Constructs private implementation object with provided members.
unordered_map< string, unordered_map< char32_t, string > > Dtransitionmap
Shorthand for the map from state name and transition symbol to target state.
builder & addSymbol(char32_t symbol)
Adds a symbol to the prospective DFA's alphabet.