reglibcpp  1.5.0
(Naïve) C++ implementation of models for regular languages
dfa.cpp
Go to the documentation of this file.
1 #include "dfa.h"
3 using std::valarray;
4 using std::vector;
5 using std::string;
6 using std::to_string;
7 using std::make_unique;
8 using std::u32string;
9 
10 using std::move;
11 
12 #include <unordered_set>
13 using std::unordered_set;
14 
15 #include <unordered_map>
16 using std::unordered_map;
17 
18 #include <forward_list>
19 using std::forward_list;
20 
21 #include <algorithm>
22 using std::find;
23 using std::none_of;
24 
25 #include <cstdint>
26 
27 #include "nfa.h"
28 #include "expression.h"
29 
30 namespace std {
31  template<>
32  struct hash<valarray<bool>> {
33  size_t operator()(valarray<bool> const& value) const {
34  size_t hash = 0;
35  for (bool v : value) {
36  hash <<= 1;
37  hash |= v;
38  }
39  return hash;
40  }
41  };
42 
43  template<>
44  struct equal_to<valarray<bool>> {
45  bool operator()(valarray<bool> const& lhs, valarray<bool> const& rhs) const {
46  return (lhs.size() == rhs.size() && (lhs == rhs).min() == true);
47  }
48  };
49 }
50 
51 namespace reg {
52 
54 struct dfa::pImpl {
60 
62  pImpl() : accepting(false, 1), alphabet(0), utf8Alphabet(0), labels(1), transitions(1) {}
63 
67  accepting(move(accepting)),
68  alphabet(move(alphabet)),
69  labels(move(labels)),
70  transitions(move(transitions)) {
71  utf8Alphabet.reserve(this->alphabet.size());
72  for (char32_t symbol : this->alphabet) {
73  utf8Alphabet.push_back(converter.to_bytes(symbol));
74  }
75  }
76 
78 
85  vector<valarray<bool>> distinguishable;
86  distinguishable.reserve(transitions.size());
87  for (size_t q(0); q < transitions.size(); q++) {
88  bool qAcc(accepting[q]);
89  distinguishable.push_back(valarray<bool>(false, transitions.size()));
90  for (size_t p(0); p < q; p++) {
91  bool pAcc(accepting[p]);
92  distinguishable[q][p] = (qAcc != pAcc);
93  distinguishable[p][q] = (qAcc != pAcc);
94  }
95  }
96  bool changes(true);
97  while (changes) {
98  changes = false;
99  for (size_t q(0); q < transitions.size(); q++) {
100  for (size_t p(0); p < q; p++) {
101  if (distinguishable[q][p]) {
102  continue;
103  }
104  for (size_t s(0); s < transitions[q].size(); s++) {
105  size_t qS(transitions[q][s]);
106  size_t pS(transitions[p][s]);
107  if (distinguishable[qS][pS]) {
108  changes = true;
109  distinguishable[q][p] = true;
110  distinguishable[p][q] = true;
111  break;
112  }
113  }
114  }
115  }
116  }
117  valarray<bool> allTrue(true, transitions.size());
118  for (size_t i(0); i < distinguishable.size(); i++) {
119  distinguishable[i] ^= allTrue;
120  }
121  return distinguishable;
122  }
123 };
124 
126 dfa::dfa() : p(new pImpl) {}
127 
129 dfa::dfa(vector<char32_t>& alphabet,
130  vector<vector<size_t>>& transitions,
131  vector<string>& labels,
132  valarray<bool>& acceptingStates) :
133  p(make_unique<pImpl>(alphabet, transitions, labels, acceptingStates)) {}
134 
136 dfa::dfa(dfa const& d) : p(new pImpl(*(d.p))) {}
137 
139 
140 dfa::dfa(dfa&& d) : p(new pImpl) {p.swap(d.p);}
141 
143 dfa::dfa(nfa const& n) : dfa(builder(n).build()) {}
144 
146 dfa::dfa(dfa::builder& b) : dfa(b.build()) {}
147 
149 dfa& dfa::operator=(const dfa& d) {
150  if (this != &d) {
151  p.reset(new pImpl(*(d.p)));
152  }
153  return *this;
154 }
155 
157 
159  if (this != &d) {
160  p.reset(new pImpl);
161  p.swap(d.p);
162  }
163  return *this;
164 }
165 
166 dfa::~dfa() = default;
167 
169 
173 bool dfa::operator==(dfa const& d) const {
174  forward_list<char32_t> unitedAlphabet(p->alphabet.begin(), p->alphabet.end());
175  size_t unitedAlphabetSize(p->alphabet.size());
176  for (char32_t symbol : d.getAlphabet()) {
177  if (find(p->alphabet.begin(), p->alphabet.end(), symbol) == p->alphabet.end()) {
178  unitedAlphabet.push_front(symbol);
179  unitedAlphabetSize++;
180  }
181  }
182  vector<vector<size_t>> tTable(getNumberOfStates()+d.getNumberOfStates()+1, vector<size_t>(unitedAlphabetSize));
183  valarray<bool> accepting(false, getNumberOfStates()+d.getNumberOfStates()+1);
184  for (size_t q(0); q < getNumberOfStates(); q++) {
185  size_t s(0);
186  vector<size_t>& row = tTable[q];
187  for (char32_t symbol : unitedAlphabet) {
188  try {
189  row[s] = delta(q, symbol);
190  } catch (std::domain_error e) {
191  row[s] = getNumberOfStates()+d.getNumberOfStates();
192  }
193  s++;
194  }
195  accepting[q] = isAccepting(q);
196  }
197  for (size_t q(0); q < d.getNumberOfStates(); q++) {
198  size_t s(0);
199  vector<size_t>& row = tTable[q+getNumberOfStates()];
200  for (char32_t symbol : unitedAlphabet) {
201  try {
202  row[s] = getNumberOfStates()+d.delta(q, symbol);
203  } catch (std::domain_error e) {
204  row[s] = getNumberOfStates()+d.getNumberOfStates();
205  }
206  s++;
207  }
208  accepting[q+getNumberOfStates()] = d.isAccepting(q);
209  }
210  for (size_t s(0); s < unitedAlphabetSize; s++) {
212  }
213  return pImpl::indistinguishableStates(tTable, accepting)[0][getNumberOfStates()];
214 }
215 
217 bool dfa::operator!=(dfa const& d) const {return !operator==(d);}
218 
220 
227 size_t dfa::delta(size_t qi, size_t si) const {
228  if (si >= p->alphabet.size()) {
229  throw std::domain_error("There is no symbol with index " + to_string(si));
230  }
231  if (qi >= p->labels.size()) {
232  throw std::out_of_range("There is no state with index " + to_string(qi));
233  }
234  return p->transitions[qi][si];
235 }
236 
238 
245 size_t dfa::delta(size_t qi, char32_t symbol) const {
246  try {
247  return delta(qi, index_of(p->alphabet, symbol));
248  } catch (std::domain_error e) {
249  throw std::domain_error(u8"δ is not defined for symbol " + converter.to_bytes(symbol));
250  }
251 }
252 
254 size_t dfa::delta(size_t qi, string const& utf8Symbol) const {
255  return delta(qi, converter.from_bytes(utf8Symbol)[0]);
256 }
257 
259 
266 string const& dfa::delta(string const& q, char32_t symbol) const {
267  try {
268  return getStates()[delta(index_of(getStates(), q), symbol)];
269  } catch (std::out_of_range e) {
270  throw std::out_of_range("There is no state named " + q);
271  }
272 }
273 
275 string const& dfa::delta(string const& q, string const& utf8Symbol) const {
276  return delta(q, converter.from_bytes(utf8Symbol)[0]);
277 }
278 
280 
287 size_t dfa::deltaHat(size_t qi, u32string const& word) const {
288  for (char32_t symbol : word) {
289  qi = delta(qi, symbol);
290  }
291  return qi;
292 }
293 
295 size_t dfa::deltaHat(size_t q, string const& utf8Word) const {
296  return deltaHat(q, converter.from_bytes(utf8Word));
297 }
298 
300 
307 string const& dfa::deltaHat(string const& q, u32string const& word) const {
308  return p->labels[deltaHat(index_of(getStates(), q), word)];
309 }
310 
312 string const& dfa::deltaHat(string const& q, string const& utf8Word) const {
313  return deltaHat(q, converter.from_bytes(utf8Word));
314 }
315 
317 
322  if (p->accepting[0]) {
323  return U"";
324  }
325  unordered_map<size_t, u32string> shortestWords(p->labels.size());
326  size_t oldSize = 0;
327  shortestWords.emplace(0, U"");
328  while (shortestWords.size() > oldSize) {
329  oldSize = shortestWords.size();
330  for (auto const& stateWord : shortestWords) {
331  for (auto symbol : p->alphabet) {
332  size_t reached = delta(stateWord.first, symbol);
333  u32string shWord = stateWord.second + symbol;
334  if (p->accepting[reached]) {
335  return shWord;
336  }
337  if (shortestWords.find(reached) == shortestWords.end()) {
338  shortestWords.emplace(reached, shWord);
339  }
340  }
341  }
342  }
343  throw std::logic_error("This DFA doesn't accept any words!");
344 }
345 
347 string dfa::getShortestUtf8Word() const {
348  return converter.to_bytes(getShortestWord());
349 }
350 
352 
357 string const& dfa::getLabelOf(size_t q) const {
358  return getStates().at(q);
359 }
360 
362 
365 string const& dfa::getInitialState() const {
366  return getStates()[0];
367 }
368 
370 
374  return p->labels;
375 }
376 
378 
382  return p->alphabet;
383 }
384 
386 
390  return p->utf8Alphabet;
391 }
392 
394 
397 size_t dfa::getNumberOfStates() const {
398  return getStates().size();
399 }
400 
402 
405 size_t dfa::getNumberOfSymbols() const {
406  return getAlphabet().size();
407 }
408 
410 
415 bool dfa::isAccepting(size_t qi) const {
416  if (qi >= p->labels.size()) {
417  throw std::out_of_range("There is no state with index " + to_string(qi));
418  }
419  return p->accepting[qi];
420 }
421 
423 
428 bool dfa::isAccepting(string const& q) const {
429  try {
430  return isAccepting(index_of(getStates(), q));
431  } catch (std::out_of_range e) {
432  throw std::out_of_range("There is no state named " + q);
433  }
434 }
435 
437 
443  return d.getShortestWord();
444 }
445 
447 string findShortestUtf8Word(dfa const& d) {
448  return d.getShortestUtf8Word();
449 }
450 
452 
459 dfa::builder dfa::unite(dfa const& d1, dfa const& d2) {
460  return builder(d1).unite(d2);
461 }
462 
464 
471 dfa::builder dfa::intersect(dfa const& d1, dfa const& d2) {
472  return builder(d1).intersect(d2);
473 }
474 
476 
483 dfa::builder dfa::subtract(dfa const& d1, dfa const& d2) {
484  builder b1(d1);
485  for (auto symbol : d2.getAlphabet()) {
486  b1.addSymbol(symbol);
487  }
488  return b1.complement().unite(d2).complement();
489 }
490 
492 
500  return dfa::builder(d).complement();
501 }
502 
505 
508  string initial;
513 
515  pImpl() = default;
517 
520  string& initial,
524  ) : initial(move(initial)),
525  alphabet(move(alphabet)),
527  transitions(move(transitions)) {}
528 
530  bool isTrashState(string const& q) const {
531  if (acceptingStates.count(q) != 0) {
532  return false;
533  }
534  auto const& from = transitions.at(q);
535  for (char32_t symbol : alphabet) {
536  auto via = from.find(symbol);
537  if (via != from.end() && (*via).second != q) {
538  return false;
539  }
540  }
541  return true;
542  }
543 
545  string const& generateNewState() {
546  size_t q(0);
547  string newState;
548  while (transitions.find(newState = "q" + to_string(q)) != transitions.end()) {
549  q++;
550  }
551  if (transitions.empty()) {
552  initial = newState;
553  }
554  return transitions.insert(std::make_pair(newState, unordered_map<char32_t, string>())).first->first;
555  }
556 
558 
559  void complete() {
560  bool trashUsed(false);
561  string const& trashState = [&]{
562  for (auto const& entry : this->transitions) {
563  if (this->isTrashState(entry.first)) {
564  trashUsed = true;
565  return entry.first;
566  }
567  }
568  return this->generateNewState();
569  }();
570  for (auto& from : transitions) {
571  for (char32_t symbol : alphabet) {
572  if (from.second.find(symbol) == from.second.end()) {
573  from.second[symbol] = trashState;
574  trashUsed |= (from.first != trashState);
575  }
576  }
577  }
578  if (!trashUsed) {
579  transitions.erase(trashState);
580  }
581  }
582 };
583 
586 
589  auto initial = dfa.getLabelOf(0);
590  auto alphabet = unordered_set<char32_t>(dfa.getAlphabet().begin(), dfa.getAlphabet().end());
592  auto transitions = Dtransitionmap(dfa.getNumberOfStates());
593  p = make_unique<pImpl>(initial, alphabet, labels, transitions);
594  for (size_t q = 0; q < dfa.getNumberOfStates(); ++q) {
595  for (char32_t symbol : dfa.getAlphabet()) {
596  defineTransition(dfa.getLabelOf(q), dfa.getLabelOf(dfa.delta(q, symbol)), symbol);
597  }
598  setAccepting(dfa.getLabelOf(q), dfa.isAccepting(q));
599  }
600 }
601 
606  size_t stackSize(1);
607  auto initial = nfa.getLabelOf(stack.front());
608  auto alphabet = nfa.getAlphabet();
609  alphabet.erase(alphabet.begin());
610  unordered_set<char32_t> alphabetS(alphabet.begin(), alphabet.end());
611  unordered_set<string> acceptingStates;
612  Dtransitionmap transitions;
613  transitions[initial];
614  p = make_unique<pImpl>(initial, alphabetS, acceptingStates, transitions);
615  while (stackSize != 0) {
616  valarray<bool> current(move(stack.front()));
617  stack.pop_front();
618  stackSize--;
619  for (char32_t symbol : p->alphabet) {
620  valarray<bool> next(nfa.deltaHat(current, u32string(1, symbol)));
621  defineTransition(nfa.getLabelOf(current), nfa.getLabelOf(next), symbol);
622  auto equalToNext = [&](valarray<bool> const& ref)->bool{return (next == ref).min() == true;};
623  if (!equalToNext(current) &&
624  none_of(stack.begin(), stack.end(), equalToNext) &&
625  none_of(done.begin(), done.end(), equalToNext)) {
626  stack.push_front(next);
627  stackSize++;
628  }
629  }
630  for (size_t qi(0); qi < current.size(); qi++) {
631  if (current[qi] && nfa.isAccepting(qi)) {
632  setAccepting(nfa.getLabelOf(current), true);
633  break;
634  }
635  }
636  done.insert(current);
637  }
638 }
639 
641 dfa::builder::builder(builder const& b) : p(new pImpl(*(b.p))) {}
642 
644 
645 dfa::builder::builder(builder&& b) : p(new pImpl) {p.swap(b.p);}
646 
649  if (this != &b) {
650  p.reset(new pImpl(*(b.p)));
651  }
652  return *this;
653 }
654 
656 
658  if (this != &b) {
659  p.reset(new pImpl);
660  p.swap(b.p);
661  }
662  return *this;
663 }
664 
665 dfa::builder::~builder() = default;
666 
668 
673  p->alphabet.insert(symbol);
674  return *this;
675 }
676 
678 dfa::builder& dfa::builder::addSymbol(string const& utf8Symbol) {
679  return addSymbol(converter.from_bytes(utf8Symbol)[0]);
680 }
681 
683 
688 dfa::builder& dfa::builder::setAccepting(string const& state, bool accept) {
689  if (p->transitions.empty()) {
690  p->initial = state;
691  }
692  p->transitions[state];
693  if (accept) {
694  p->acceptingStates.insert(state);
695  } else {
696  p->acceptingStates.erase(state);
697  }
698  return *this;
699 }
700 
702 
707  p->initial = state;
708  p->transitions[state];
709  return *this;
710 }
711 
713 
722 dfa::builder& dfa::builder::defineTransition(string const& from, string const& to, char32_t symbol) {
723  if (p->transitions.empty()) {
724  p->initial = from;
725  }
726  p->transitions[to];
727  p->transitions[from][symbol] = to;
728  addSymbol(symbol);
729  return *this;
730 }
731 
733 dfa::builder& dfa::builder::defineTransition(string const& from, string const& to, string const& utf8Symbol) {
734  return defineTransition(from, to, converter.from_bytes(utf8Symbol)[0]);
735 }
736 
738 
748  p->complete();
749  vector<vector<size_t>> tTable(p->transitions.size());
750  valarray<bool> accepting(false, p->transitions.size());
751  vector<char32_t> sortedAlphabet(p->alphabet.begin(), p->alphabet.end());
752  std::sort(sortedAlphabet.begin(), sortedAlphabet.end());
753  vector<string> orderedStates;
754  orderedStates.reserve(p->transitions.size());
755  orderedStates.push_back(p->initial);
756  for (auto const& entry : p->transitions) {
757  if (entry.first != p->initial) {
758  orderedStates.push_back(entry.first);
759  }
760  }
761  for (size_t q(0); q < orderedStates.size(); q++) {
762  string const& qLabel(orderedStates[q]);
763  if (p->acceptingStates.find(qLabel) != p->acceptingStates.end()) {
764  accepting[q] = true;
765  }
766  for (size_t s(0); s < sortedAlphabet.size(); s++) {
767  tTable[q].push_back(index_of(orderedStates, p->transitions[qLabel][sortedAlphabet[s]]));
768  }
769  }
770  vector<valarray<bool>> equivalences(dfa::pImpl::indistinguishableStates(tTable, accepting));
771  for (size_t q(0); q < orderedStates.size(); q++) {
772  for (size_t first(0); first < equivalences[q].size(); first++) {
773  if (equivalences[q][first] && q > first) {
774  string const& superfluous(orderedStates[q]);
775  string const& remaining(orderedStates[first]);
776  p->acceptingStates.erase(superfluous);
777  p->transitions.erase(superfluous);
778  for (auto& from : p->transitions) {
779  for (auto& via : from.second) {
780  if (via.second == superfluous) {
781  via.second = remaining;
782  }
783  }
784  }
785  break;
786  }
787  }
788  }
789  return *this;
790 }
791 
793 
800  unordered_set<string> reachable(&(p->initial), &(p->initial)+1);
801  unordered_set<string> newReachable(reachable);
802  while (newReachable.size() > 0) {
803  unordered_set<string> oldReachable(move(newReachable));
804  newReachable.clear();
805  for (string const& reachableState : oldReachable) {
806  for (auto const& reachedState : p->transitions[reachableState]) {
807  if (reachable.insert(reachedState.second).second) {
808  newReachable.insert(reachedState.second);
809  }
810  }
811  }
812  }
813  for (auto it = p->transitions.begin(); it != p->transitions.end(); ) {
814  if (reachable.find(it->first) == reachable.end()) {
815  p->acceptingStates.erase(it->first);
816  it = p->transitions.erase(it);
817  } else {
818  it++;
819  }
820  }
821  unordered_set<string> producing(p->acceptingStates);
822  unordered_set<string> newProducing(producing);
823  while (newProducing.size() > 0) {
824  unordered_set<string> oldProducing(move(newProducing));
825  newProducing.clear();
826  for (auto const& from : p->transitions) {
827  for (auto const& via : from.second) {
828  if (producing.find(via.second) != producing.end()) {
829  if (producing.insert(from.first).second) {
830  newProducing.insert(from.first);
831  }
832  break;
833  }
834  }
835  }
836  }
837  for (auto it = p->transitions.begin(); it != p->transitions.end(); ) {
838  if (producing.find(it->first) == producing.end() && it->first != p->initial) {
839  p->acceptingStates.erase(it->first);
840  for (auto& via : p->transitions) {
841  for (auto itv = via.second.begin(); itv != via.second.end(); ) {
842  if (itv->second == it->first) {
843  itv = via.second.erase(itv);
844  } else {
845  itv++;
846  }
847  }
848  }
849  it = p->transitions.erase(it);
850  } else {
851  it++;
852  }
853  }
854  return *this;
855 }
856 
859  return purge().merge();
860 }
861 
863 
870  if (!p->transitions.empty()) {
871  auto const& oAlph = other.getAlphabet();
872  for (char32_t symbol : oAlph) {
873  addSymbol(symbol);
874  }
875  size_t trash = other.getNumberOfStates() * !!(p->alphabet.size() - oAlph.size());
876  p->complete();
877  vector<string> stateNames;
878  stateNames.reserve(p->transitions.size());
879  stateNames.push_back(p->initial);
880  for (auto const& fromVia : p->transitions) {
881  if (fromVia.first != p->initial) {
882  stateNames.push_back(fromVia.first);
883  }
884  }
885  Dtransitionmap newTr(stateNames.size() * other.getNumberOfStates());
886  unordered_set<string> newAcc(stateNames.size() * other.getNumberOfStates());
887  unordered_map<size_t, unordered_map<size_t, string const*>> pairToName(stateNames.size() * other.getNumberOfStates());
888  for (size_t q = 0; q < stateNames.size(); q++) {
889  for (size_t qq = 0; qq < other.getNumberOfStates() + !!trash; qq++) {
890  string potentialName = stateNames[q];
891  try {
892  potentialName.append(other.getLabelOf(qq));
893  } catch (std::out_of_range e) {
894  // We hit the trash state.
895  }
896  for (auto nameIt = newTr.find(potentialName); nameIt != newTr.end(); nameIt = newTr.find(potentialName)) {
897  potentialName.append("_");
898  }
899  auto nameIt = newTr.emplace(potentialName, p->alphabet.size());
900  pairToName.emplace(q, 1).first->second.emplace(qq, &(nameIt.first->first));
901  try {
902  if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end() || other.isAccepting(qq)) {
903  newAcc.insert(potentialName);
904  }
905  } catch (std::out_of_range e) {
906  // We hit the trash state AND q is not accepting.
907  }
908  }
909  }
910  for (size_t q = 0; q < stateNames.size(); q++) {
911  auto const& qLabel = stateNames[q];
912  auto const& viaTos = p->transitions[qLabel];
913  for (auto const& viaTo : viaTos) {
914  size_t p = index_of(stateNames, viaTo.second);
915  for (size_t qq = 0; qq < other.getNumberOfStates() + !!trash; qq++) {
916  size_t pp;
917  try {
918  pp = other.delta(qq, viaTo.first);
919  } catch (std::logic_error e) {
920  pp = trash;
921  }
922  newTr[*(pairToName[q][qq])][viaTo.first] = *(pairToName[p][pp]);
923  }
924  }
925  }
926  p->transitions = newTr;
927  p->acceptingStates = newAcc;
928  p->initial = *(pairToName[0][0]);
929  } else {
930  for (size_t q = 0; q < other.getNumberOfStates(); ++q) {
931  for (char32_t symbol : other.getAlphabet()) {
932  defineTransition(other.getLabelOf(q), other.getLabelOf(other.delta(q, symbol)), symbol);
933  }
934  setAccepting(other.getLabelOf(q), other.isAccepting(q));
935  }
936  }
937  return *this;
938 }
939 
941 
948  if (!p->transitions.empty()) {
949  vector<string> stateNames;
950  stateNames.reserve(p->transitions.size());
951  stateNames.push_back(p->initial);
952  for (auto const& fromTo : p->transitions) {
953  if (fromTo.first != p->initial) {
954  stateNames.push_back(fromTo.first);
955  }
956  }
957  auto const& oAlph = other.getAlphabet();
958  size_t commonSymbols = 0;
959  for (char32_t symbol : p->alphabet) {
960  if (index_of(oAlph, symbol) < oAlph.size()) {
961  commonSymbols++;
962  } else {
963  for (auto & fromVia : p->transitions) {
964  fromVia.second.erase(symbol);
965  }
966  }
967  }
968  p->alphabet.insert(oAlph.begin(), oAlph.end());
969  Dtransitionmap newTr(stateNames.size() * other.getNumberOfStates());
970  unordered_set<string> newAcc(stateNames.size() * other.getNumberOfStates());
971  unordered_map<size_t, unordered_map<size_t, string const*>> pairToName(stateNames.size() * other.getNumberOfStates());
972  for (size_t q = 0; q < stateNames.size(); q++) {
973  for (size_t qq = 0; qq < other.getNumberOfStates(); qq++) {
974  string potentialName = stateNames[q] + other.getLabelOf(qq);
975  for (auto nameIt = newTr.find(potentialName); nameIt != newTr.end(); nameIt = newTr.find(potentialName)) {
976  potentialName.append("_");
977  }
978  auto nameIt = newTr.emplace(potentialName, commonSymbols);
979  pairToName.emplace(q, 1).first->second.emplace(qq, &(nameIt.first->first));
980  if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end() && other.isAccepting(qq)) {
981  newAcc.insert(potentialName);
982  }
983  }
984  }
985  for (size_t q = 0; q < stateNames.size(); q++) {
986  auto const& qLabel = stateNames[q];
987  auto const& viaTos = p->transitions[qLabel];
988  for (auto const& viaTo : viaTos) {
989  size_t p = index_of(stateNames, viaTo.second);
990  for (size_t qq = 0; qq < other.getNumberOfStates(); qq++) {
991  size_t pp = other.delta(qq, viaTo.first);
992  newTr[*(pairToName[q][qq])][viaTo.first] = *(pairToName[p][pp]);
993  }
994  }
995  }
996  p->transitions = newTr;
997  p->acceptingStates = newAcc;
998  p->initial = *(pairToName[0][0]);
999  }
1000  return *this;
1001 }
1002 
1005  p->complete();
1006  for (auto const& fromVia : p->transitions) {
1007  if (!p->acceptingStates.erase(fromVia.first)) {
1008  p->acceptingStates.insert(fromVia.first);
1009  }
1010  }
1011  return *this;
1012 }
1013 
1016  if (!p->transitions.empty()) {
1017  vector<string> stateNames;
1018  stateNames.reserve(p->transitions.size());
1019  stateNames.push_back(p->initial);
1020  for (auto const& fromTo : p->transitions) {
1021  if (fromTo.first != p->initial) {
1022  stateNames.push_back(fromTo.first);
1023  }
1024  }
1025  Dtransitionmap newTr(p->transitions.size());
1026  unordered_set<string> newAcc(p->acceptingStates.size());
1027  for (size_t q = 0; q < stateNames.size(); q++) {
1028  auto const& vias = p->transitions[stateNames[q]];
1029  unordered_map<char32_t,string> newVias(vias.size());
1030  for (auto const& viaTo : vias) {
1031  newVias.insert(make_pair(viaTo.first, prefix + to_string(index_of(stateNames, viaTo.second))));
1032  }
1033  newTr.insert(make_pair(prefix + to_string(q), newVias));
1034  if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end()) {
1035  newAcc.insert(prefix + to_string(q));
1036  }
1037  }
1038  p->initial = prefix + "0";
1039  p->transitions = newTr;
1040  p->acceptingStates = newAcc;
1041  }
1042  return *this;
1043 }
1044 
1046 
1050  vector<char32_t> alphabetV(p->alphabet.begin(), p->alphabet.end());
1051  std::sort(alphabetV.begin(), alphabetV.end());
1052  p->complete();
1053  vector<string> labelV;
1054  labelV.reserve(p->transitions.size());
1055  labelV.push_back(p->initial);
1056  for (auto const& tr : p->transitions) {
1057  if (tr.first == p->initial) {
1058  continue;
1059  }
1060  labelV.push_back(tr.first);
1061  }
1062  valarray<bool> acceptingV(false, labelV.size());
1063  vector<vector<size_t>> transitionV(labelV.size(), vector<size_t>(alphabetV.size()));
1064  for (size_t q(0); q < labelV.size(); q++) {
1065  string const& qLabel = labelV[q];
1066  acceptingV[q] = (p->acceptingStates.find(qLabel) != p->acceptingStates.end());
1067  for (size_t s(0); s < alphabetV.size(); s++) {
1068  transitionV[q][s] = index_of(labelV, p->transitions[qLabel][alphabetV[s]]);
1069  }
1070  }
1071  return {alphabetV, transitionV, labelV, acceptingV};
1072 }
1073 
1075 
1080 template<class T> size_t index_of(vector<T> const& vec, T const& element) {
1081  return static_cast<size_t>(std::distance(vec.begin(), find(vec.begin(), vec.end(), element)));
1082 }
1084 template size_t index_of(vector<char32_t> const& vec, char32_t const& element);
1086 template size_t index_of(vector<string> const& vec, string const& element);
1087 
1089 } // namespace reg::dfa
bool operator!=(const dfa &d) const
Tests whether this DFA doesn't accept the same language as another one.
Definition: dfa.cpp:217
static dfa::builder complement(dfa const &d)
Creates a builder for a DFA accepting the complement of the language of a DFA.
Definition: dfa.cpp:499
std::string const & getInitialState() const
Names this DFA's initial state.
Definition: dfa.cpp:365
builder & setAccepting(std::string const &state, bool accept)
Sets whether or not a state will be accepting within the prospective DFA.
Definition: dfa.cpp:688
bool isAccepting(size_t qi) const
Tests whether a state is an accept state within this DFA.
Definition: dfa.cpp:415
builder & unite(dfa const &other)
Makes the prospective DFA also accept every word of another DFA&#39;s language.
Definition: dfa.cpp:869
Dtransitionmap transitions
Transition function (state × symbol → state) of the prospective DFA.
Definition: dfa.cpp:511
bool isTrashState(string const &q) const
Tests whether all of a state's outgoing transitions point to itself.
Definition: dfa.cpp:530
vector< char32_t > alphabet
Represents the set of processable symbols.
Definition: dfa.cpp:56
dfa()
Constructs a DFA accepting the empty language ∅.
Definition: dfa.cpp:126
Represents nondeterministic finite automata with ε-moves.
Definition: nfa.h:25
valarray< bool > accepting
A true value marks an index as belonging to an accept state.
Definition: dfa.cpp:55
std::vector< char32_t > const & getAlphabet() const
Fetches this NFA's set of processable symbols.
Definition: nfa.cpp:466
vector< string > utf8Alphabet
Represents the set of processable symbols as UTF-8-encoded strings.
Definition: dfa.cpp:57
std::string const & getLabelOf(size_t q) const
Puts a name to an index.
Definition: nfa.cpp:366
static dfa::builder unite(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the union of the languages of two DFAs.
Definition: dfa.cpp:459
Represents deterministic finite automata.
Definition: dfa.h:27
Private implementation details of DFAs.
Definition: dfa.cpp:54
std::wstring_convert< std::codecvt_utf8< char32_t >, char32_t > converter
Converts between UTF-8-encoded and UTF-32-encoded strings.
Definition: dfa.cpp:1088
builder & minimize()
Convenience method for chaining purge and merge to achieve proper minimization.
Definition: dfa.cpp:858
vector< string > labels
Stores the names of states.
Definition: dfa.cpp:58
vector< vector< size_t > > transitions
Stores the transition function as a table viz state index × symbol index → state index...
Definition: dfa.cpp:59
builder & purge()
Purges the prospective DFA of unreachable and non-producing states, allowing for minimization.
Definition: dfa.cpp:799
unordered_set< char32_t > alphabet
Set of symbols processable by the prospective DFA.
Definition: dfa.cpp:509
size_t delta(size_t qi, size_t si) const
Computes this DFA's transition function for a state index and a symbol index.
Definition: dfa.cpp:227
std::vector< char32_t > const & getAlphabet() const
Fetches this DFA's set of processable symbols.
Definition: dfa.cpp:381
pImpl(vector< char32_t > &alphabet, vector< vector< size_t >> &transitions, vector< string > &labels, valarray< bool > &accepting)
Constructs private implementation object with provided members.
Definition: dfa.cpp:65
static vector< valarray< bool > > indistinguishableStates(vector< vector< size_t >> const &transitions, valarray< bool > const &accepting)
Builds the table of indistinguishable states w.r.t. a transition function.
Definition: dfa.cpp:83
T min(T... args)
dfa & operator=(const dfa &d)
Copy-assigns this DFA by copying another one's private implementation object.
Definition: dfa.cpp:149
size_t getNumberOfSymbols() const
Counts this DFA's alphabet symbols.
Definition: dfa.cpp:405
Private implementation details of DFA builders.
Definition: dfa.cpp:507
Contains the reg::nfa class definition.
size_t deltaHat(size_t qi, std::u32string const &word) const
Computes this DFA's transition function recursively for a string of symbols, starting in a state spec...
Definition: dfa.cpp:287
static dfa::builder subtract(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the set difference of the languages of two DFAs.
Definition: dfa.cpp:483
Contains the reg::expression class defintion.
T hash(T... args)
bool isAccepting(size_t qi) const
Tests whether a state is an accept state within this NFA.
Definition: nfa.cpp:500
size_t index_of(vector< T > const &vec, T const &element)
Basically Java&#39;s List interface&#39;s indexOf, but as a non-member function and returning the container&#39;s...
Definition: dfa.cpp:1080
bool operator==(const dfa &d) const
Tests whether this DFA accepts exactly the same language as another one.
Definition: dfa.cpp:173
builder & complement()
Inverts the prospective DFA&#39;s language with respect to all possible strings over its alphabet...
Definition: dfa.cpp:1004
void complete()
Totalizes a partial transition function by pointing any undefined transitions towards a trash state...
Definition: dfa.cpp:559
std::vector< std::string > const & getStates() const
Fetches this DFA's set of states.
Definition: dfa.cpp:373
builder & makeInitial(std::string const &state)
Resets the initial state for the prospective DFA.
Definition: dfa.cpp:706
builder & merge()
Merges the prospective DFA's indistinguishable states, allowing for minimization. ...
Definition: dfa.cpp:747
Constructs DFAs step by step.
Definition: dfa.h:70
size_t getNumberOfStates() const
Counts this DFA's states.
Definition: dfa.cpp:397
T operator()(T... args)
builder & normalizeStateNames(std::string const &prefix)
Reduces the prospective NFA&#39;s state names to consecutive numbers, prefixed with a given string...
Definition: dfa.cpp:1015
string initial
Name of the prospective DFA&#39;s initial state.
Definition: dfa.cpp:508
builder()
Constructs a blank builder object.
Definition: dfa.cpp:585
static dfa::builder intersect(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the intersection of the languages of two DFAs.
Definition: dfa.cpp:471
std::valarray< bool > deltaHat(size_t qi, std::u32string const &word) const
Computes this NFA's transition function recursively for a string of symbols, starting in a state spec...
Definition: nfa.cpp:165
string const & generateNewState()
Generates a uniquely named new state and adds it to the set of states.
Definition: dfa.cpp:545
Where this library lives.
Definition: dfa.cpp:51
unordered_set< string > acceptingStates
Set of names of the prospective DFA&#39;s accept states.
Definition: dfa.cpp:510
std::u32string getShortestWord() const
Searches the shortest UTF-32-encoded word accepted by this DFA.
Definition: dfa.cpp:321
builder & intersect(dfa const &other)
Makes the prospective DFA accept only words accepted also by another DFA.
Definition: dfa.cpp:947
builder & operator=(const builder &b)
Copy-assigns a builder by copying another one's private implementation object.
Definition: dfa.cpp:648
string findShortestUtf8Word(dfa const &d)
Same as above for a UTF-8-encoded word.
Definition: dfa.cpp:447
pImpl()
Constructs private implementation object for a DFA accepting the empty language ∅.
Definition: dfa.cpp:62
std::string getShortestUtf8Word() const
Same as above for a UTF-8-encoded word.
Definition: dfa.cpp:347
pImpl()=default
Constructs empty private implementation object.
u32string findShortestWord(dfa const &d)
Searches the shortest UTF-32-encoded word accepted by a given DFA.
Definition: dfa.cpp:442
T operator()(T... args)
dfa build()
Builds the DFA, as defined by previous operations, including completion.
Definition: dfa.cpp:1049
std::string const & getLabelOf(size_t qi) const
Puts a name to an index.
Definition: dfa.cpp:357
std::valarray< bool > const & epsilonClosure(size_t qi) const
Computes a state's ε-closure.
Definition: nfa.cpp:287
std::vector< std::string > const & getUtf8Alphabet() const
Fetches this DFA's set of processable symbols as UTF-8-encoded strings.
Definition: dfa.cpp:389
builder & defineTransition(std::string const &from, std::string const &to, char32_t symbol)
(Re-)Defines a transition for the prospective DFA.
Definition: dfa.cpp:722
Contains the reg::dfa class definition.
pImpl(string &initial, unordered_set< char32_t > &alphabet, unordered_set< string > &acceptingStates, Dtransitionmap &transitions)
Constructs private implementation object with provided members.
Definition: dfa.cpp:519
unordered_map< string, unordered_map< char32_t, string > > Dtransitionmap
Shorthand for the map from state name and transition symbol to target state.
Definition: dfa.cpp:504
builder & addSymbol(char32_t symbol)
Adds a symbol to the prospective DFA's alphabet.
Definition: dfa.cpp:672