7 using std::make_unique;
12 #include <unordered_set> 15 #include <unordered_map> 18 #include <forward_list> 35 for (
bool v : value) {
46 return (lhs.size() == rhs.size() && (lhs == rhs).
min() ==
true);
72 for (char32_t symbol : this->alphabet) {
90 for (
size_t p(0); p < q; p++) {
92 distinguishable[q][p] = (qAcc != pAcc);
93 distinguishable[p][q] = (qAcc != pAcc);
100 for (
size_t p(0); p < q; p++) {
101 if (distinguishable[q][p]) {
104 for (
size_t s(0); s <
transitions[q].size(); s++) {
107 if (distinguishable[qS][pS]) {
109 distinguishable[q][p] =
true;
110 distinguishable[p][q] =
true;
118 for (
size_t i(0); i < distinguishable.size(); i++) {
119 distinguishable[i] ^= allTrue;
121 return distinguishable;
133 p(make_unique<pImpl>(alphabet, transitions, labels, acceptingStates)) {}
151 p.reset(
new pImpl(*(d.p)));
166 dfa::~dfa() =
default;
175 size_t unitedAlphabetSize(p->alphabet.size());
177 if (find(p->alphabet.begin(), p->alphabet.end(), symbol) == p->alphabet.end()) {
178 unitedAlphabet.push_front(symbol);
179 unitedAlphabetSize++;
187 for (char32_t symbol : unitedAlphabet) {
189 row[s] =
delta(q, symbol);
200 for (char32_t symbol : unitedAlphabet) {
210 for (
size_t s(0); s < unitedAlphabetSize; s++) {
228 if (si >= p->alphabet.size()) {
231 if (qi >= p->labels.size()) {
234 return p->transitions[qi][si];
254 size_t dfa::delta(
size_t qi,
string const& utf8Symbol)
const {
266 string const&
dfa::delta(
string const& q, char32_t symbol)
const {
275 string const&
dfa::delta(
string const& q,
string const& utf8Symbol)
const {
288 for (char32_t symbol : word) {
289 qi =
delta(qi, symbol);
312 string const&
dfa::deltaHat(
string const& q,
string const& utf8Word)
const {
322 if (p->accepting[0]) {
327 shortestWords.emplace(0, U
"");
328 while (shortestWords.size() > oldSize) {
329 oldSize = shortestWords.size();
330 for (
auto const& stateWord : shortestWords) {
331 for (
auto symbol : p->alphabet) {
332 size_t reached =
delta(stateWord.first, symbol);
333 u32string shWord = stateWord.second + symbol;
334 if (p->accepting[reached]) {
337 if (shortestWords.find(reached) == shortestWords.end()) {
338 shortestWords.emplace(reached, shWord);
390 return p->utf8Alphabet;
416 if (qi >= p->labels.size()) {
419 return p->accepting[qi];
536 auto via = from.find(symbol);
537 if (via != from.end() && (*via).second != q) {
560 bool trashUsed(
false);
561 string const& trashState = [&]{
562 for (
auto const& entry : this->transitions) {
572 if (from.second.find(symbol) == from.second.end()) {
573 from.second[symbol] = trashState;
574 trashUsed |= (from.first != trashState);
593 p = make_unique<pImpl>(initial, alphabet, labels, transitions);
609 alphabet.erase(alphabet.begin());
613 transitions[initial];
614 p = make_unique<pImpl>(initial, alphabetS, acceptingStates, transitions);
615 while (stackSize != 0) {
619 for (char32_t symbol : p->alphabet) {
622 auto equalToNext = [&](
valarray<bool> const& ref)->
bool{
return (next == ref).min() ==
true;};
623 if (!equalToNext(current) &&
624 none_of(stack.begin(), stack.end(), equalToNext) &&
625 none_of(done.begin(), done.end(), equalToNext)) {
626 stack.push_front(next);
630 for (
size_t qi(0); qi < current.size(); qi++) {
636 done.insert(current);
650 p.reset(
new pImpl(*(b.p)));
665 dfa::builder::~builder() =
default;
673 p->alphabet.insert(symbol);
679 return addSymbol(
converter.from_bytes(utf8Symbol)[0]);
689 if (p->transitions.empty()) {
692 p->transitions[state];
694 p->acceptingStates.insert(state);
696 p->acceptingStates.erase(state);
708 p->transitions[state];
723 if (p->transitions.empty()) {
727 p->transitions[from][symbol] = to;
734 return defineTransition(from, to,
converter.from_bytes(utf8Symbol)[0]);
752 std::sort(sortedAlphabet.begin(), sortedAlphabet.end());
754 orderedStates.reserve(p->transitions.size());
755 orderedStates.push_back(p->initial);
756 for (
auto const& entry : p->transitions) {
757 if (entry.first != p->initial) {
758 orderedStates.push_back(entry.first);
761 for (
size_t q(0); q < orderedStates.size(); q++) {
762 string const& qLabel(orderedStates[q]);
763 if (p->acceptingStates.find(qLabel) != p->acceptingStates.end()) {
766 for (
size_t s(0); s < sortedAlphabet.size(); s++) {
767 tTable[q].push_back(
index_of(orderedStates, p->transitions[qLabel][sortedAlphabet[s]]));
771 for (
size_t q(0); q < orderedStates.size(); q++) {
772 for (
size_t first(0); first < equivalences[q].size(); first++) {
773 if (equivalences[q][first] && q > first) {
774 string const& superfluous(orderedStates[q]);
775 string const& remaining(orderedStates[first]);
776 p->acceptingStates.erase(superfluous);
777 p->transitions.erase(superfluous);
778 for (
auto& from : p->transitions) {
779 for (
auto& via : from.second) {
780 if (via.second == superfluous) {
781 via.second = remaining;
802 while (newReachable.size() > 0) {
804 newReachable.clear();
805 for (
string const& reachableState : oldReachable) {
806 for (
auto const& reachedState : p->transitions[reachableState]) {
807 if (reachable.insert(reachedState.second).second) {
808 newReachable.insert(reachedState.second);
813 for (
auto it = p->transitions.begin(); it != p->transitions.end(); ) {
814 if (reachable.find(it->first) == reachable.end()) {
815 p->acceptingStates.erase(it->first);
816 it = p->transitions.erase(it);
823 while (newProducing.size() > 0) {
825 newProducing.clear();
826 for (
auto const& from : p->transitions) {
827 for (
auto const& via : from.second) {
828 if (producing.find(via.second) != producing.end()) {
829 if (producing.insert(from.first).second) {
830 newProducing.insert(from.first);
837 for (
auto it = p->transitions.begin(); it != p->transitions.end(); ) {
838 if (producing.find(it->first) == producing.end() && it->first != p->initial) {
839 p->acceptingStates.erase(it->first);
840 for (
auto& via : p->transitions) {
841 for (
auto itv = via.second.begin(); itv != via.second.end(); ) {
842 if (itv->second == it->first) {
843 itv = via.second.erase(itv);
849 it = p->transitions.erase(it);
859 return purge().
merge();
870 if (!p->transitions.empty()) {
872 for (char32_t symbol : oAlph) {
875 size_t trash = other.
getNumberOfStates() * !!(p->alphabet.size() - oAlph.size());
878 stateNames.reserve(p->transitions.size());
879 stateNames.push_back(p->initial);
880 for (
auto const& fromVia : p->transitions) {
881 if (fromVia.first != p->initial) {
882 stateNames.push_back(fromVia.first);
888 for (
size_t q = 0; q < stateNames.size(); q++) {
890 string potentialName = stateNames[q];
896 for (
auto nameIt = newTr.find(potentialName); nameIt != newTr.end(); nameIt = newTr.find(potentialName)) {
897 potentialName.append(
"_");
899 auto nameIt = newTr.emplace(potentialName, p->alphabet.size());
900 pairToName.emplace(q, 1).first->second.emplace(qq, &(nameIt.first->first));
902 if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end() || other.
isAccepting(qq)) {
903 newAcc.insert(potentialName);
910 for (
size_t q = 0; q < stateNames.size(); q++) {
911 auto const& qLabel = stateNames[q];
912 auto const& viaTos = p->transitions[qLabel];
913 for (
auto const& viaTo : viaTos) {
914 size_t p =
index_of(stateNames, viaTo.second);
918 pp = other.
delta(qq, viaTo.first);
922 newTr[*(pairToName[q][qq])][viaTo.first] = *(pairToName[p][pp]);
926 p->transitions = newTr;
927 p->acceptingStates = newAcc;
928 p->initial = *(pairToName[0][0]);
948 if (!p->transitions.empty()) {
950 stateNames.reserve(p->transitions.size());
951 stateNames.push_back(p->initial);
952 for (
auto const& fromTo : p->transitions) {
953 if (fromTo.first != p->initial) {
954 stateNames.push_back(fromTo.first);
958 size_t commonSymbols = 0;
959 for (char32_t symbol : p->alphabet) {
960 if (
index_of(oAlph, symbol) < oAlph.size()) {
963 for (
auto & fromVia : p->transitions) {
964 fromVia.second.erase(symbol);
968 p->alphabet.insert(oAlph.begin(), oAlph.end());
972 for (
size_t q = 0; q < stateNames.size(); q++) {
974 string potentialName = stateNames[q] + other.
getLabelOf(qq);
975 for (
auto nameIt = newTr.find(potentialName); nameIt != newTr.end(); nameIt = newTr.find(potentialName)) {
976 potentialName.append(
"_");
978 auto nameIt = newTr.emplace(potentialName, commonSymbols);
979 pairToName.emplace(q, 1).first->second.emplace(qq, &(nameIt.first->first));
980 if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end() && other.
isAccepting(qq)) {
981 newAcc.insert(potentialName);
985 for (
size_t q = 0; q < stateNames.size(); q++) {
986 auto const& qLabel = stateNames[q];
987 auto const& viaTos = p->transitions[qLabel];
988 for (
auto const& viaTo : viaTos) {
989 size_t p =
index_of(stateNames, viaTo.second);
991 size_t pp = other.
delta(qq, viaTo.first);
992 newTr[*(pairToName[q][qq])][viaTo.first] = *(pairToName[p][pp]);
996 p->transitions = newTr;
997 p->acceptingStates = newAcc;
998 p->initial = *(pairToName[0][0]);
1006 for (
auto const& fromVia : p->transitions) {
1007 if (!p->acceptingStates.erase(fromVia.first)) {
1008 p->acceptingStates.insert(fromVia.first);
1016 if (!p->transitions.empty()) {
1018 stateNames.reserve(p->transitions.size());
1019 stateNames.push_back(p->initial);
1020 for (
auto const& fromTo : p->transitions) {
1021 if (fromTo.first != p->initial) {
1022 stateNames.push_back(fromTo.first);
1027 for (
size_t q = 0; q < stateNames.size(); q++) {
1028 auto const& vias = p->transitions[stateNames[q]];
1030 for (
auto const& viaTo : vias) {
1031 newVias.insert(make_pair(viaTo.first, prefix + to_string(
index_of(stateNames, viaTo.second))));
1033 newTr.insert(make_pair(prefix + to_string(q), newVias));
1034 if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end()) {
1035 newAcc.insert(prefix + to_string(q));
1038 p->initial = prefix +
"0";
1039 p->transitions = newTr;
1040 p->acceptingStates = newAcc;
1051 std::sort(alphabetV.begin(), alphabetV.end());
1054 labelV.reserve(p->transitions.size());
1055 labelV.push_back(p->initial);
1056 for (
auto const& tr : p->transitions) {
1057 if (tr.first == p->initial) {
1060 labelV.push_back(tr.first);
1064 for (
size_t q(0); q < labelV.size(); q++) {
1065 string const& qLabel = labelV[q];
1066 acceptingV[q] = (p->acceptingStates.find(qLabel) != p->acceptingStates.end());
1067 for (
size_t s(0); s < alphabetV.size(); s++) {
1068 transitionV[q][s] =
index_of(labelV, p->transitions[qLabel][alphabetV[s]]);
1071 return {alphabetV, transitionV, labelV, acceptingV};
1081 return static_cast<size_t>(std::distance(vec.begin(), find(vec.begin(), vec.end(), element)));
bool operator!=(const dfa &d) const
Tests whether this DFA doesn't accept the same language as another one.
static dfa::builder complement(dfa const &d)
Creates a builder for a DFA accepting the complement of the language of a DFA.
std::string const & getInitialState() const
Names this DFA's initial state.
builder & setAccepting(std::string const &state, bool accept)
Sets whether or not a state will be accepting within the prospective DFA.
bool isAccepting(size_t qi) const
Tests whether a state is an accept state within this DFA.
builder & unite(dfa const &other)
Makes the prospective DFA also accept every word of another DFA's language.
Dtransitionmap transitions
Transition function (state × symbol → state) of the prospective DFA.
bool isTrashState(string const &q) const
Tests whether all of a state's outgoing transitions point to itself.
vector< char32_t > alphabet
Represents the set of processable symbols.
dfa()
Constructs a DFA accepting the empty language ∅.
Represents nondeterministic finite automata with ε-moves.
valarray< bool > accepting
A true value marks an index as belonging to an accept state.
std::vector< char32_t > const & getAlphabet() const
Fetches this NFA's set of processable symbols.
vector< string > utf8Alphabet
Represents the set of processable symbols as UTF-8-encoded strings.
std::string const & getLabelOf(size_t q) const
Puts a name to an index.
static dfa::builder unite(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the union of the languages of two DFAs.
Represents deterministic finite automata.
Private implementation details of DFAs.
std::wstring_convert< std::codecvt_utf8< char32_t >, char32_t > converter
Converts between UTF-8-encoded and UTF-32-encoded strings.
builder & minimize()
Convenience method for chaining purge and merge to achieve proper minimization.
vector< string > labels
Stores the names of states.
vector< vector< size_t > > transitions
Stores the transition function as a table viz state index × symbol index → state index...
builder & purge()
Purges the prospective DFA of unreachable and non-producing states, allowing for minimization.
unordered_set< char32_t > alphabet
Set of symbols processable by the prospective DFA.
size_t delta(size_t qi, size_t si) const
Computes this DFA's transition function for a state index and a symbol index.
std::vector< char32_t > const & getAlphabet() const
Fetches this DFA's set of processable symbols.
pImpl(vector< char32_t > &alphabet, vector< vector< size_t >> &transitions, vector< string > &labels, valarray< bool > &accepting)
Constructs private implementation object with provided members.
static vector< valarray< bool > > indistinguishableStates(vector< vector< size_t >> const &transitions, valarray< bool > const &accepting)
Builds the table of indistinguishable states w.r.t. a transition function.
dfa & operator=(const dfa &d)
Copy-assigns this DFA by copying another one's private implementation object.
size_t getNumberOfSymbols() const
Counts this DFA's alphabet symbols.
Private implementation details of DFA builders.
Contains the reg::nfa class definition.
size_t deltaHat(size_t qi, std::u32string const &word) const
Computes this DFA's transition function recursively for a string of symbols, starting in a state spec...
static dfa::builder subtract(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the set difference of the languages of two DFAs.
Contains the reg::expression class defintion.
bool isAccepting(size_t qi) const
Tests whether a state is an accept state within this NFA.
size_t index_of(vector< T > const &vec, T const &element)
Basically Java's List interface's indexOf, but as a non-member function and returning the container's...
bool operator==(const dfa &d) const
Tests whether this DFA accepts exactly the same language as another one.
builder & complement()
Inverts the prospective DFA's language with respect to all possible strings over its alphabet...
void complete()
Totalizes a partial transition function by pointing any undefined transitions towards a trash state...
std::vector< std::string > const & getStates() const
Fetches this DFA's set of states.
builder & makeInitial(std::string const &state)
Resets the initial state for the prospective DFA.
builder & merge()
Merges the prospective DFA's indistinguishable states, allowing for minimization. ...
Constructs DFAs step by step.
size_t getNumberOfStates() const
Counts this DFA's states.
builder & normalizeStateNames(std::string const &prefix)
Reduces the prospective NFA's state names to consecutive numbers, prefixed with a given string...
string initial
Name of the prospective DFA's initial state.
builder()
Constructs a blank builder object.
static dfa::builder intersect(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the intersection of the languages of two DFAs.
std::valarray< bool > deltaHat(size_t qi, std::u32string const &word) const
Computes this NFA's transition function recursively for a string of symbols, starting in a state spec...
string const & generateNewState()
Generates a uniquely named new state and adds it to the set of states.
Where this library lives.
unordered_set< string > acceptingStates
Set of names of the prospective DFA's accept states.
std::u32string getShortestWord() const
Searches the shortest UTF-32-encoded word accepted by this DFA.
builder & intersect(dfa const &other)
Makes the prospective DFA accept only words accepted also by another DFA.
builder & operator=(const builder &b)
Copy-assigns a builder by copying another one's private implementation object.
string findShortestUtf8Word(dfa const &d)
Same as above for a UTF-8-encoded word.
pImpl()
Constructs private implementation object for a DFA accepting the empty language ∅.
std::string getShortestUtf8Word() const
Same as above for a UTF-8-encoded word.
pImpl()=default
Constructs empty private implementation object.
u32string findShortestWord(dfa const &d)
Searches the shortest UTF-32-encoded word accepted by a given DFA.
dfa build()
Builds the DFA, as defined by previous operations, including completion.
std::string const & getLabelOf(size_t qi) const
Puts a name to an index.
std::valarray< bool > const & epsilonClosure(size_t qi) const
Computes a state's ε-closure.
std::vector< std::string > const & getUtf8Alphabet() const
Fetches this DFA's set of processable symbols as UTF-8-encoded strings.
builder & defineTransition(std::string const &from, std::string const &to, char32_t symbol)
(Re-)Defines a transition for the prospective DFA.
Contains the reg::dfa class definition.
pImpl(string &initial, unordered_set< char32_t > &alphabet, unordered_set< string > &acceptingStates, Dtransitionmap &transitions)
Constructs private implementation object with provided members.
unordered_map< string, unordered_map< char32_t, string > > Dtransitionmap
Shorthand for the map from state name and transition symbol to target state.
builder & addSymbol(char32_t symbol)
Adds a symbol to the prospective DFA's alphabet.