reglibcpp  1.3.0
(Naïve) C++ implementation of models for regular languages
dfa.cpp
Go to the documentation of this file.
1 #include "dfa.h"
3 using std::valarray;
4 using std::vector;
5 using std::string;
6 using std::make_unique;
7 using std::u32string;
8 
9 using std::move;
10 
11 #include <unordered_set>
12 using std::unordered_set;
13 
14 #include <unordered_map>
15 using std::unordered_map;
16 
17 #include <forward_list>
18 using std::forward_list;
19 
20 #include <algorithm>
21 using std::find;
22 using std::none_of;
23 
24 #include <cstdint>
25 
26 #include "nfa.h"
27 #include "expression.h"
28 
29 namespace reg {
30 
32 struct dfa::pImpl {
33  valarray<bool> accepting;
34  vector<char32_t> alphabet;
35  vector<string> utf8Alphabet;
36  vector<string> labels;
37  vector<vector<size_t>> transitions;
38 
40  pImpl() : accepting(false, 1), alphabet(0), utf8Alphabet(0), labels(1), transitions(0) {}
41 
43  pImpl(vector<char32_t>& alphabet, vector<vector<size_t>>& transitions,
44  vector<string>& labels, valarray<bool>& accepting) :
45  accepting(move(accepting)),
46  alphabet(move(alphabet)),
47  labels(move(labels)),
48  transitions(move(transitions)) {
49  utf8Alphabet.reserve(this->alphabet.size());
50  for (char32_t symbol : this->alphabet) {
51  utf8Alphabet.push_back(converter.to_bytes(symbol));
52  }
53  }
54 
56 
61  static vector<valarray<bool>> indistinguishableStates(
62  vector<vector<size_t>> const& transitions, valarray<bool> const& accepting) {
63  vector<valarray<bool>> distinguishable;
64  distinguishable.reserve(transitions.size());
65  for (size_t q(0); q < transitions.size(); q++) {
66  bool qAcc(accepting[q]);
67  distinguishable.push_back(valarray<bool>(false, transitions.size()));
68  for (size_t p(0); p < q; p++) {
69  bool pAcc(accepting[p]);
70  distinguishable[q][p] = (qAcc != pAcc);
71  distinguishable[p][q] = (qAcc != pAcc);
72  }
73  }
74  bool changes(true);
75  while (changes) {
76  changes = false;
77  for (size_t q(0); q < transitions.size(); q++) {
78  for (size_t p(0); p < q; p++) {
79  if (distinguishable[q][p]) {
80  continue;
81  }
82  for (size_t s(0); s < transitions[q].size(); s++) {
83  size_t qS(transitions[q][s]);
84  size_t pS(transitions[p][s]);
85  if (distinguishable[qS][pS]) {
86  changes = true;
87  distinguishable[q][p] = true;
88  distinguishable[p][q] = true;
89  break;
90  }
91  }
92  }
93  }
94  }
95  valarray<bool> allTrue(true, transitions.size());
96  for (size_t i(0); i < distinguishable.size(); i++) {
97  distinguishable[i] ^= allTrue;
98  }
99  return distinguishable;
100  }
101 };
102 
104 dfa::dfa() : p(new pImpl) {}
105 
107 dfa::dfa(vector<char32_t>& alphabet,
108  vector<vector<size_t>>& transitions,
109  vector<string>& labels,
110  valarray<bool>& acceptingStates) :
111  p(make_unique<pImpl>(alphabet, transitions, labels, acceptingStates)) {}
112 
114 dfa::dfa(dfa const& d) : p(new pImpl(*(d.p))) {}
115 
117 
118 dfa::dfa(dfa&& d) : p(new pImpl) {p.swap(d.p);}
119 
121 dfa::dfa(nfa const& n) : dfa(builder(n).build()) {}
122 
124 dfa::dfa(dfa::builder& b) : dfa(b.build()) {}
125 
127 dfa& dfa::operator=(const dfa& d) {
128  if (this != &d) {
129  p.reset(new pImpl(*(d.p)));
130  }
131  return *this;
132 }
133 
135 
137  if (this != &d) {
138  p.reset(new pImpl);
139  p.swap(d.p);
140  }
141  return *this;
142 }
143 
144 dfa::~dfa() = default;
145 
147 
151 bool dfa::operator==(dfa const& d) const {
152  forward_list<char32_t> unitedAlphabet(p->alphabet.begin(), p->alphabet.end());
153  size_t unitedAlphabetSize(p->alphabet.size());
154  for (char32_t symbol : d.getAlphabet()) {
155  if (find(p->alphabet.begin(), p->alphabet.end(), symbol) == p->alphabet.end()) {
156  unitedAlphabet.push_front(symbol);
157  unitedAlphabetSize++;
158  }
159  }
160  vector<vector<size_t>> tTable(getNumberOfStates()+d.getNumberOfStates()+1, vector<size_t>(unitedAlphabetSize));
161  valarray<bool> accepting(false, getNumberOfStates()+d.getNumberOfStates()+1);
162  for (size_t q(0); q < getNumberOfStates(); q++) {
163  size_t s(0);
164  vector<size_t>& row = tTable[q];
165  for (char32_t symbol : unitedAlphabet) {
166  try {
167  row[s] = delta(q, symbol);
168  } catch (std::domain_error e) {
169  row[s] = getNumberOfStates()+d.getNumberOfStates();
170  }
171  s++;
172  }
173  accepting[q] = isAccepting(q);
174  }
175  for (size_t q(0); q < d.getNumberOfStates(); q++) {
176  size_t s(0);
177  vector<size_t>& row = tTable[q+getNumberOfStates()];
178  for (char32_t symbol : unitedAlphabet) {
179  try {
180  row[s] = getNumberOfStates()+d.delta(q, symbol);
181  } catch (std::domain_error e) {
182  row[s] = getNumberOfStates()+d.getNumberOfStates();
183  }
184  s++;
185  }
186  accepting[q+getNumberOfStates()] = d.isAccepting(q);
187  }
188  for (size_t s(0); s < unitedAlphabetSize; s++) {
190  }
191  return pImpl::indistinguishableStates(tTable, accepting)[0][getNumberOfStates()];
192 }
193 
195 bool dfa::operator!=(dfa const& d) const {return !operator==(d);}
196 
198 
203 size_t dfa::delta(size_t q, char32_t symbol) const {
204  size_t i = index_of(p->alphabet, symbol);
205  if (i >= p->alphabet.size()) {
206  throw std::domain_error(
207  string(u8"δ is not defined for symbol ") + converter.to_bytes(symbol)
208  );
209  }
210  if (q >= p->labels.size()) {
211  throw std::out_of_range(
212  string("There is no state with index ").append(std::to_string(q))
213  );
214  }
215  return p->transitions[q][i];
216 }
217 
219 size_t dfa::delta(size_t q, string const& utf8Symbol) const {
220  return delta(q, converter.from_bytes(utf8Symbol)[0]);
221 }
222 
224 
229 size_t dfa::deltaHat(size_t q, std::u32string const& word) const {
230  for (char32_t symbol : word) {
231  q = delta(q, symbol);
232  }
233  return q;
234 }
235 
237 size_t dfa::deltaHat(size_t q, string const& utf8Word) const {
238  return deltaHat(q, converter.from_bytes(utf8Word));
239 }
240 
242 
254 u32string dfa::getShortestWord() const {
255  if (p->accepting[0]) {
256  return U"";
257  }
258  unordered_map<size_t, u32string> shortestWords(p->labels.size());
259  shortestWords.emplace(0, U"");
260  while (true) {
261  for (auto const& stateWord : shortestWords) {
262  for (auto symbol : p->alphabet) {
263  size_t reached = delta(stateWord.first, symbol);
264  u32string shWord = stateWord.second + symbol;
265  if (p->accepting[reached]) {
266  return shWord;
267  }
268  if (shortestWords.find(reached) == shortestWords.end()) {
269  shortestWords.emplace(reached, shWord);
270  }
271  }
272  }
273  }
274 }
275 
277 string dfa::getShortestUtf8Word() const {
278  return converter.to_bytes(getShortestWord());
279 }
280 
282 
286 string const& dfa::getLabelOf(size_t q) const {
287  return p->labels.at(q);
288 }
289 
291 
294 vector<char32_t> const& dfa::getAlphabet() const {
295  return p->alphabet;
296 }
297 
299 
302 vector<string> const& dfa::getUtf8Alphabet() const {
303  return p->utf8Alphabet;
304 }
305 
307 
310 size_t dfa::getNumberOfStates() const {
311  return p->labels.size();
312 }
313 
315 
319 bool dfa::isAccepting(size_t q) const {
320  if (q >= p->labels.size()) {
321  throw std::out_of_range(
322  string("There is no state with index ").append(std::to_string(q))
323  );
324  }
325  return p->accepting[q];
326 }
327 
329 
336 dfa::builder dfa::unite(dfa const& d1, dfa const& d2) {
337  return builder(d1).unite(d2);
338 }
339 
341 
348 dfa::builder dfa::intersect(dfa const& d1, dfa const& d2) {
349  return builder(d1).intersect(d2);
350 }
351 
353 
360 dfa::builder dfa::subtract(dfa const& d1, dfa const& d2) {
361  builder b1(d1);
362  for (auto symbol : d2.getAlphabet()) {
363  b1.addSymbol(symbol);
364  }
365  return b1.complement().unite(d2).complement();
366 }
367 
369 
377  return dfa::builder(d).complement();
378 }
379 
380 using Dtransitionmap = unordered_map<string, unordered_map<char32_t, string>>;
381 
384  string initial;
385  unordered_set<char32_t> alphabet;
386  unordered_set<string> acceptingStates;
387  Dtransitionmap transitions;
389 
391  pImpl() = default;
393 
396  string& initial,
397  unordered_set<char32_t>& alphabet,
398  unordered_set<string>& acceptingStates,
399  Dtransitionmap& transitions
400  ) : initial(move(initial)),
401  alphabet(move(alphabet)),
403  transitions(move(transitions)) {}
404 
406  bool isTrashState(string const& q) const {
407  if (acceptingStates.count(q) != 0) {
408  return false;
409  }
410  auto const& from = transitions.at(q);
411  for (char32_t symbol : alphabet) {
412  auto via = from.find(symbol);
413  if (via != from.end() && (*via).second != q) {
414  return false;
415  }
416  }
417  return true;
418  }
419 
421  string const& generateNewState() {
422  size_t q(0);
423  string newState;
424  while (transitions.find(newState = string("q").append(std::to_string(q))) != transitions.end()) {
425  q++;
426  }
427  if (transitions.empty()) {
428  initial = newState;
429  }
430  return transitions.insert(std::make_pair(newState, unordered_map<char32_t, string>())).first->first;
431  }
432 
434 
435  void complete() {
436  bool trashUsed(false);
437  string const& trashState = [&]{
438  for (auto const& entry : this->transitions) {
439  if (this->isTrashState(entry.first)) {
440  trashUsed = true;
441  return entry.first;
442  }
443  }
444  return this->generateNewState();
445  }();
446  for (auto& from : transitions) {
447  for (char32_t symbol : alphabet) {
448  if (from.second.find(symbol) == from.second.end()) {
449  from.second[symbol] = trashState;
450  trashUsed |= (from.first != trashState);
451  }
452  }
453  }
454  if (!trashUsed) {
455  transitions.erase(trashState);
456  }
457  }
458 
461  size_t operator()(valarray<bool> const& value) const {
462  size_t hash = 0;
463  for (bool v : value) {
464  hash <<= 1;
465  hash |= v;
466  }
467  return hash;
468  }
469  };
470 
472  static bool valarrayFullTrue(valarray<bool> const& a) {
473  bool fullTrue = true;
474  for (bool b : a) {
475  fullTrue &= b;
476  }
477  return fullTrue;
478  }
479 
482  valarray<bool> const& val;
483 
484  ValarrayBoolEqualTo(valarray<bool> const& val) : val(val) {}
485 
486  bool operator()(valarray<bool> const& other) const {
487  return (val.size() == other.size() && valarrayFullTrue(val == other));
488  }
489  };
490 
493  bool operator()(valarray<bool> const& a, valarray<bool> const& b) const {
494  return ValarrayBoolEqualTo(a)(b);
495  }
496  };
497 };
498 
501 
504  auto initial = dfa.getLabelOf(0);
505  auto alphabet = unordered_set<char32_t>(dfa.getAlphabet().begin(), dfa.getAlphabet().end());
506  auto labels = unordered_set<string>(dfa.getNumberOfStates());
507  auto transitions = Dtransitionmap(dfa.getNumberOfStates());
508  p = make_unique<pImpl>(initial, alphabet, labels, transitions);
509  for (size_t q = 0; q < dfa.getNumberOfStates(); ++q) {
510  for (char32_t symbol : dfa.getAlphabet()) {
511  defineTransition(dfa.getLabelOf(q), dfa.getLabelOf(dfa.delta(q, symbol)), symbol);
512  }
513  setAccepting(dfa.getLabelOf(q), dfa.isAccepting(q));
514  }
515 }
516 
519  unordered_set<valarray<bool>, pImpl::ValarrayBoolHasher, pImpl::ValarrayBoolEqual> done;
520  forward_list<valarray<bool>> stack(1, nfa.epsilonClosure(0));
521  size_t stackSize(1);
522  auto initial = nfa.getLabelOf(stack.front());
523  auto alphabet = nfa.getAlphabet();
524  alphabet.erase(alphabet.begin());
525  unordered_set<char32_t> alphabetS(alphabet.begin(), alphabet.end());
526  unordered_set<string> acceptingStates;
527  Dtransitionmap transitions;
528  transitions[initial];
529  p = make_unique<pImpl>(initial, alphabetS, acceptingStates, transitions);
530  while (stackSize != 0) {
531  valarray<bool> current(move(stack.front()));
532  stack.pop_front();
533  stackSize--;
534  for (char32_t symbol : p->alphabet) {
535  valarray<bool> next(nfa.deltaHat(current, u32string(1, symbol)));
536  defineTransition(nfa.getLabelOf(current), nfa.getLabelOf(next), symbol);
537  pImpl::ValarrayBoolEqualTo equalToNext(next);
538  if (!equalToNext(current) &&
539  none_of(stack.begin(), stack.end(), equalToNext) &&
540  none_of(done.begin(), done.end(), equalToNext)) {
541  stack.push_front(next);
542  stackSize++;
543  }
544  }
545  for (size_t qi(0); qi < current.size(); qi++) {
546  if (current[qi] && nfa.isAccepting(qi)) {
547  setAccepting(nfa.getLabelOf(current), true);
548  break;
549  }
550  }
551  done.insert(current);
552  }
553 }
554 
556 dfa::builder::builder(builder const& b) : p(new pImpl(*(b.p))) {}
557 
559 
560 dfa::builder::builder(builder&& b) : p(new pImpl) {p.swap(b.p);}
561 
564  if (this != &b) {
565  p.reset(new pImpl(*(b.p)));
566  }
567  return *this;
568 }
569 
571 
573  if (this != &b) {
574  p.reset(new pImpl);
575  p.swap(b.p);
576  }
577  return *this;
578 }
579 
580 dfa::builder::~builder() = default;
581 
583 
588  p->alphabet.insert(symbol);
589  return *this;
590 }
591 
593 dfa::builder& dfa::builder::addSymbol(string const& utf8Symbol) {
594  return addSymbol(converter.from_bytes(utf8Symbol)[0]);
595 }
596 
598 
603 dfa::builder& dfa::builder::setAccepting(string const& state, bool accept) {
604  if (p->transitions.empty()) {
605  p->initial = state;
606  }
607  p->transitions[state];
608  if (accept) {
609  p->acceptingStates.insert(state);
610  } else {
611  p->acceptingStates.erase(state);
612  }
613  return *this;
614 }
615 
617 
622  p->initial = state;
623  p->transitions[state];
624  return *this;
625 }
626 
628 
637 dfa::builder& dfa::builder::defineTransition(string const& from, string const& to, char32_t symbol) {
638  if (p->transitions.empty()) {
639  p->initial = from;
640  }
641  p->transitions[to];
642  p->transitions[from][symbol] = to;
643  addSymbol(symbol);
644  return *this;
645 }
646 
648 dfa::builder& dfa::builder::defineTransition(string const& from, string const& to, string const& utf8Symbol) {
649  return defineTransition(from, to, converter.from_bytes(utf8Symbol)[0]);
650 }
651 
653 
663  p->complete();
664  vector<vector<size_t>> tTable(p->transitions.size());
665  valarray<bool> accepting(false, p->transitions.size());
666  vector<char32_t> sortedAlphabet(p->alphabet.begin(), p->alphabet.end());
667  std::sort(sortedAlphabet.begin(), sortedAlphabet.end());
668  vector<string> orderedStates;
669  orderedStates.reserve(p->transitions.size());
670  orderedStates.push_back(p->initial);
671  for (auto const& entry : p->transitions) {
672  if (entry.first != p->initial) {
673  orderedStates.push_back(entry.first);
674  }
675  }
676  for (size_t q(0); q < orderedStates.size(); q++) {
677  string const& qLabel(orderedStates[q]);
678  if (p->acceptingStates.find(qLabel) != p->acceptingStates.end()) {
679  accepting[q] = true;
680  }
681  for (size_t s(0); s < sortedAlphabet.size(); s++) {
682  tTable[q].push_back(index_of(orderedStates, p->transitions[qLabel][sortedAlphabet[s]]));
683  }
684  }
685  vector<valarray<bool>> equivalences(dfa::pImpl::indistinguishableStates(tTable, accepting));
686  for (size_t q(0); q < orderedStates.size(); q++) {
687  for (size_t first(0); first < equivalences[q].size(); first++) {
688  if (equivalences[q][first] && q > first) {
689  string const& superfluous(orderedStates[q]);
690  string const& remaining(orderedStates[first]);
691  p->acceptingStates.erase(superfluous);
692  p->transitions.erase(superfluous);
693  for (auto& from : p->transitions) {
694  for (auto& via : from.second) {
695  if (via.second == superfluous) {
696  via.second = remaining;
697  }
698  }
699  }
700  break;
701  }
702  }
703  }
704  return *this;
705 }
706 
708 
715  unordered_set<string> reachable(&(p->initial), &(p->initial)+1);
716  unordered_set<string> newReachable(reachable);
717  while (newReachable.size() > 0) {
718  unordered_set<string> oldReachable(move(newReachable));
719  newReachable.clear();
720  for (string const& reachableState : oldReachable) {
721  for (auto const& reachedState : p->transitions[reachableState]) {
722  if (reachable.insert(reachedState.second).second) {
723  newReachable.insert(reachedState.second);
724  }
725  }
726  }
727  }
728  for (auto it = p->transitions.begin(); it != p->transitions.end(); ) {
729  if (reachable.find(it->first) == reachable.end()) {
730  p->acceptingStates.erase(it->first);
731  it = p->transitions.erase(it);
732  } else {
733  it++;
734  }
735  }
736  unordered_set<string> producing(p->acceptingStates);
737  unordered_set<string> newProducing(producing);
738  while (newProducing.size() > 0) {
739  unordered_set<string> oldProducing(move(newProducing));
740  newProducing.clear();
741  for (auto const& from : p->transitions) {
742  for (auto const& via : from.second) {
743  if (producing.find(via.second) != producing.end()) {
744  if (producing.insert(from.first).second) {
745  newProducing.insert(from.first);
746  }
747  break;
748  }
749  }
750  }
751  }
752  for (auto it = p->transitions.begin(); it != p->transitions.end(); ) {
753  if (producing.find(it->first) == producing.end() && it->first != p->initial) {
754  p->acceptingStates.erase(it->first);
755  for (auto& via : p->transitions) {
756  for (auto itv = via.second.begin(); itv != via.second.end(); ) {
757  if (itv->second == it->first) {
758  itv = via.second.erase(itv);
759  } else {
760  itv++;
761  }
762  }
763  }
764  it = p->transitions.erase(it);
765  } else {
766  it++;
767  }
768  }
769  return *this;
770 }
771 
774  return purge().merge();
775 }
776 
778 
785  if (!p->transitions.empty()) {
786  auto const& oAlph = other.getAlphabet();
787  for (char32_t symbol : oAlph) {
788  addSymbol(symbol);
789  }
790  size_t trash = other.getNumberOfStates() * !!(p->alphabet.size() - oAlph.size());
791  p->complete();
792  vector<string> stateNames;
793  stateNames.reserve(p->transitions.size());
794  stateNames.push_back(p->initial);
795  for (auto const& fromVia : p->transitions) {
796  if (fromVia.first != p->initial) {
797  stateNames.push_back(fromVia.first);
798  }
799  }
800  Dtransitionmap newTr(stateNames.size() * other.getNumberOfStates());
801  unordered_set<string> newAcc(stateNames.size() * other.getNumberOfStates());
802  unordered_map<size_t, unordered_map<size_t, string const*>> pairToName(stateNames.size() * other.getNumberOfStates());
803  for (size_t q = 0; q < stateNames.size(); q++) {
804  for (size_t qq = 0; qq < other.getNumberOfStates() + !!trash; qq++) {
805  string potentialName = stateNames[q];
806  try {
807  potentialName.append(other.getLabelOf(qq));
808  } catch (std::out_of_range e) {
809  // We hit the trash state.
810  }
811  for (auto nameIt = newTr.find(potentialName); nameIt != newTr.end(); nameIt = newTr.find(potentialName)) {
812  potentialName.append("_");
813  }
814  auto nameIt = newTr.emplace(potentialName, p->alphabet.size());
815  pairToName.emplace(q, 1).first->second.emplace(qq, &(nameIt.first->first));
816  try {
817  if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end() || other.isAccepting(qq)) {
818  newAcc.insert(potentialName);
819  }
820  } catch (std::out_of_range e) {
821  // We hit the trash state AND q is not accepting.
822  }
823  }
824  }
825  for (size_t q = 0; q < stateNames.size(); q++) {
826  auto const& qLabel = stateNames[q];
827  auto const& viaTos = p->transitions[qLabel];
828  for (auto const& viaTo : viaTos) {
829  size_t p = index_of(stateNames, viaTo.second);
830  for (size_t qq = 0; qq < other.getNumberOfStates() + !!trash; qq++) {
831  size_t pp;
832  try {
833  pp = other.delta(qq, viaTo.first);
834  } catch (std::logic_error e) {
835  pp = trash;
836  }
837  newTr[*(pairToName[q][qq])][viaTo.first] = *(pairToName[p][pp]);
838  }
839  }
840  }
841  p->transitions = newTr;
842  p->acceptingStates = newAcc;
843  p->initial = *(pairToName[0][0]);
844  } else {
845  for (size_t q = 0; q < other.getNumberOfStates(); ++q) {
846  for (char32_t symbol : other.getAlphabet()) {
847  defineTransition(other.getLabelOf(q), other.getLabelOf(other.delta(q, symbol)), symbol);
848  }
849  setAccepting(other.getLabelOf(q), other.isAccepting(q));
850  }
851  }
852  return *this;
853 }
854 
856 
863  if (!p->transitions.empty()) {
864  vector<string> stateNames;
865  stateNames.reserve(p->transitions.size());
866  stateNames.push_back(p->initial);
867  for (auto const& fromTo : p->transitions) {
868  if (fromTo.first != p->initial) {
869  stateNames.push_back(fromTo.first);
870  }
871  }
872  auto const& oAlph = other.getAlphabet();
873  size_t commonSymbols = 0;
874  for (char32_t symbol : p->alphabet) {
875  if (index_of(oAlph, symbol) < oAlph.size()) {
876  commonSymbols++;
877  } else {
878  for (auto & fromVia : p->transitions) {
879  fromVia.second.erase(symbol);
880  }
881  }
882  }
883  p->alphabet.insert(oAlph.begin(), oAlph.end());
884  Dtransitionmap newTr(stateNames.size() * other.getNumberOfStates());
885  unordered_set<string> newAcc(stateNames.size() * other.getNumberOfStates());
886  unordered_map<size_t, unordered_map<size_t, string const*>> pairToName(stateNames.size() * other.getNumberOfStates());
887  for (size_t q = 0; q < stateNames.size(); q++) {
888  for (size_t qq = 0; qq < other.getNumberOfStates(); qq++) {
889  string potentialName = stateNames[q] + other.getLabelOf(qq);
890  for (auto nameIt = newTr.find(potentialName); nameIt != newTr.end(); nameIt = newTr.find(potentialName)) {
891  potentialName.append("_");
892  }
893  auto nameIt = newTr.emplace(potentialName, commonSymbols);
894  pairToName.emplace(q, 1).first->second.emplace(qq, &(nameIt.first->first));
895  if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end() && other.isAccepting(qq)) {
896  newAcc.insert(potentialName);
897  }
898  }
899  }
900  for (size_t q = 0; q < stateNames.size(); q++) {
901  auto const& qLabel = stateNames[q];
902  auto const& viaTos = p->transitions[qLabel];
903  for (auto const& viaTo : viaTos) {
904  size_t p = index_of(stateNames, viaTo.second);
905  for (size_t qq = 0; qq < other.getNumberOfStates(); qq++) {
906  size_t pp = other.delta(qq, viaTo.first);
907  newTr[*(pairToName[q][qq])][viaTo.first] = *(pairToName[p][pp]);
908  }
909  }
910  }
911  p->transitions = newTr;
912  p->acceptingStates = newAcc;
913  p->initial = *(pairToName[0][0]);
914  }
915  return *this;
916 }
917 
920  p->complete();
921  for (auto const& fromVia : p->transitions) {
922  if (!p->acceptingStates.erase(fromVia.first)) {
923  p->acceptingStates.insert(fromVia.first);
924  }
925  }
926  return *this;
927 }
928 
930 dfa::builder& dfa::builder::normalizeStateNames(std::string const& prefix) {
931  if (!p->transitions.empty()) {
932  vector<string> stateNames;
933  stateNames.reserve(p->transitions.size());
934  stateNames.push_back(p->initial);
935  for (auto const& fromTo : p->transitions) {
936  if (fromTo.first != p->initial) {
937  stateNames.push_back(fromTo.first);
938  }
939  }
940  Dtransitionmap newTr(p->transitions.size());
941  unordered_set<string> newAcc(p->acceptingStates.size());
942  for (size_t q = 0; q < stateNames.size(); q++) {
943  auto const& vias = p->transitions[stateNames[q]];
944  unordered_map<char32_t,string> newVias(vias.size());
945  for (auto const& viaTo : vias) {
946  newVias.insert(make_pair(viaTo.first, string(prefix).append(std::to_string(index_of(stateNames, viaTo.second)))));
947  }
948  newTr.insert(make_pair(string(prefix).append(std::to_string(q)), newVias));
949  if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end()) {
950  newAcc.insert(string(prefix).append(std::to_string(q)));
951  }
952  }
953  p->initial = string(string(prefix).append("0"));
954  p->transitions = newTr;
955  p->acceptingStates = newAcc;
956  }
957  return *this;
958 }
959 
961 
965  vector<char32_t> alphabetV(p->alphabet.begin(), p->alphabet.end());
966  std::sort(alphabetV.begin(), alphabetV.end());
967  p->complete();
968  vector<string> labelV;
969  labelV.reserve(p->transitions.size());
970  labelV.push_back(p->initial);
971  for (auto const& tr : p->transitions) {
972  if (tr.first == p->initial) {
973  continue;
974  }
975  labelV.push_back(tr.first);
976  }
977  valarray<bool> acceptingV(false, labelV.size());
978  vector<vector<size_t>> transitionV(labelV.size(), vector<size_t>(alphabetV.size()));
979  for (size_t q(0); q < labelV.size(); q++) {
980  string const& qLabel = labelV[q];
981  acceptingV[q] = (p->acceptingStates.find(qLabel) != p->acceptingStates.end());
982  for (size_t s(0); s < alphabetV.size(); s++) {
983  transitionV[q][s] = index_of(labelV, p->transitions[qLabel][alphabetV[s]]);
984  }
985  }
986  return {alphabetV, transitionV, labelV, acceptingV};
987 }
988 
990 
995 template<class T> size_t index_of(vector<T> const& vec, T const& element) {
996  return static_cast<size_t>(std::distance(vec.begin(), find(vec.begin(), vec.end(), element)));
997 }
999 template size_t index_of(vector<char32_t> const& vec, char32_t const& element);
1000 
1001 std::wstring_convert<std::codecvt_utf8<char32_t>,char32_t> converter;
1002 } // namespace reg::dfa
bool isAccepting(size_t q) const
Tests whether a state is an accept state within this DFA.
Definition: dfa.cpp:319
bool operator!=(const dfa &d) const
Tests whether this DFA doesn't accept the same language as another one.
Definition: dfa.cpp:195
static dfa::builder complement(dfa const &d)
Creates a builder for a DFA accepting the complement of the language of a DFA.
Definition: dfa.cpp:376
builder & setAccepting(std::string const &state, bool accept)
Sets whether or not a state will be accepting within the prospective DFA.
Definition: dfa.cpp:603
Comparison object for valarray<bool>s against a specific instance.
Definition: dfa.cpp:481
builder & unite(dfa const &other)
Makes the prospective DFA also accept every word of another DFA&#39;s language.
Definition: dfa.cpp:784
Dtransitionmap transitions
Transition function (state × symbol → state) of the prospective DFA.
Definition: dfa.cpp:387
bool isTrashState(string const &q) const
Tests whether all of a state's outgoing transitions point to itself.
Definition: dfa.cpp:406
size_t delta(size_t q, char32_t symbol) const
Computes this DFA's transition function for a state and a symbol.
Definition: dfa.cpp:203
vector< char32_t > alphabet
Represents the set of processable symbols.
Definition: dfa.cpp:34
dfa()
Constructs a DFA accepting the empty language ∅.
Definition: dfa.cpp:104
Represents nondeterministic finite automata with ε-moves.
Definition: nfa.h:22
valarray< bool > accepting
A true value marks an index as belonging to an accept state.
Definition: dfa.cpp:33
std::vector< char32_t > const & getAlphabet() const
Fetches this NFA's set of processable symbols.
Definition: nfa.cpp:298
vector< string > utf8Alphabet
Represents the set of processable symbols as UTF-8-encoded strings.
Definition: dfa.cpp:35
std::string const & getLabelOf(size_t q) const
Puts a name to an index.
Definition: nfa.cpp:268
static dfa::builder unite(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the union of the languages of two DFAs.
Definition: dfa.cpp:336
Comparison object for valarray<bool>s, so they can be used as unordered_set keys. ...
Definition: dfa.cpp:492
Represents deterministic finite automata.
Definition: dfa.h:26
Private implementation details of DFAs.
Definition: dfa.cpp:32
std::valarray< bool > deltaHat(size_t q, std::u32string const &word) const
Computes this NFA's transition function recursively for a string of symbols, starting in a specified ...
Definition: nfa.cpp:132
builder & minimize()
Convenience method for chaining purge and merge to achieve proper minimization.
Definition: dfa.cpp:773
vector< string > labels
Stores the names of states.
Definition: dfa.cpp:36
Hasher object for valarray<bool>s, so they can be used as unordered_set keys.
Definition: dfa.cpp:460
vector< vector< size_t > > transitions
Stores the transition function as a table viz state index × symbol index → state index...
Definition: dfa.cpp:37
bool isAccepting(size_t q) const
Tests whether a state is an accept state within this NFA.
Definition: nfa.cpp:323
builder & purge()
Purges the prospective DFA of unreachable and non-producing states, allowing for minimization.
Definition: dfa.cpp:714
unordered_set< char32_t > alphabet
Set of symbols processable by the prospective DFA.
Definition: dfa.cpp:385
std::vector< char32_t > const & getAlphabet() const
Fetches this DFA's set of processable symbols.
Definition: dfa.cpp:294
pImpl(vector< char32_t > &alphabet, vector< vector< size_t >> &transitions, vector< string > &labels, valarray< bool > &accepting)
Constructs private implementation object with provided members.
Definition: dfa.cpp:43
static vector< valarray< bool > > indistinguishableStates(vector< vector< size_t >> const &transitions, valarray< bool > const &accepting)
Builds the table of indistinguishable states w.r.t. a transition function.
Definition: dfa.cpp:61
dfa & operator=(const dfa &d)
Copy-assigns this DFA by copying another one's private implementation object.
Definition: dfa.cpp:127
Private implementation details of DFA builders.
Definition: dfa.cpp:383
Contains the reg::nfa class definition.
static dfa::builder subtract(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the set difference of the languages of two DFAs.
Definition: dfa.cpp:360
Contains the reg::expression class defintion.
static bool valarrayFullTrue(valarray< bool > const &a)
Tests whether all of a valarray<bool>'s entries are true.
Definition: dfa.cpp:472
bool operator==(const dfa &d) const
Tests whether this DFA accepts exactly the same language as another one.
Definition: dfa.cpp:151
builder & complement()
Inverts the prospective DFA&#39;s language with respect to all possible strings over its alphabet...
Definition: dfa.cpp:919
void complete()
Totalizes a partial transition function by pointing any undefined transitions towards a trash state...
Definition: dfa.cpp:435
builder & makeInitial(std::string const &state)
Resets the initial state for the prospective DFA.
Definition: dfa.cpp:621
builder & merge()
Merges the prospective DFA's indistinguishable states, allowing for minimization. ...
Definition: dfa.cpp:662
Constructs DFAs step by step.
Definition: dfa.h:60
size_t deltaHat(size_t q, std::u32string const &word) const
Computes this DFA's transition function recursively for a string of symbols, starting in a specified ...
Definition: dfa.cpp:229
size_t getNumberOfStates() const
Counts this DFA's states.
Definition: dfa.cpp:310
builder & normalizeStateNames(std::string const &prefix)
Reduces the prospective NFA&#39;s state names to consecutive numbers, prefixed with a given string...
Definition: dfa.cpp:930
string initial
Name of the prospective DFA&#39;s initial state.
Definition: dfa.cpp:384
builder()
Constructs a blank builder object.
Definition: dfa.cpp:500
static dfa::builder intersect(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the intersection of the languages of two DFAs.
Definition: dfa.cpp:348
string const & generateNewState()
Generates a uniquely named new state and adds it to the set of states.
Definition: dfa.cpp:421
Definition: dfa.cpp:29
unordered_set< string > acceptingStates
Set of names of the prospective DFA&#39;s accept states.
Definition: dfa.cpp:386
size_t index_of(vector< T > const &vec, T const &element)
Basically Java&#39;s List interface&#39;s indexOf, but as a non-member function and returning the container&#39;s...
Definition: dfa.cpp:995
std::u32string getShortestWord() const
Searches the shortest UTF-32-encoded word accepted by this DFA.
Definition: dfa.cpp:254
builder & intersect(dfa const &other)
Makes the prospective DFA accept only words accepted also by another DFA.
Definition: dfa.cpp:862
builder & operator=(const builder &b)
Copy-assigns a builder by copying another one's private implementation object.
Definition: dfa.cpp:563
pImpl()
Constructs private implementation object for a DFA accepting the empty language ∅.
Definition: dfa.cpp:40
std::string getShortestUtf8Word() const
Same as above for a UTF-8-encoded word, INCLUDING POTENTIAL INFINITE LOOP.
Definition: dfa.cpp:277
pImpl()=default
Constructs empty private implementation object.
std::string const & getLabelOf(size_t q) const
Puts a name to an index.
Definition: dfa.cpp:286
dfa build()
Builds the DFA, as defined by previous operations, including completion.
Definition: dfa.cpp:964
std::vector< std::string > const & getUtf8Alphabet() const
Fetches this DFA's set of processable symbols as UTF-8-encoded strings.
Definition: dfa.cpp:302
builder & defineTransition(std::string const &from, std::string const &to, char32_t symbol)
(Re-)Defines a transition for the prospective DFA.
Definition: dfa.cpp:637
std::valarray< bool > const & epsilonClosure(size_t q) const
Computes a state's ε-closure.
Definition: nfa.cpp:217
Contains the reg::dfa class definition.
std::wstring_convert< std::codecvt_utf8< char32_t >, char32_t > converter
Converts between UTF-8-encoded and UTF-32-encoded strings.
Definition: dfa.cpp:1001
pImpl(string &initial, unordered_set< char32_t > &alphabet, unordered_set< string > &acceptingStates, Dtransitionmap &transitions)
Constructs private implementation object with provided members.
Definition: dfa.cpp:395
builder & addSymbol(char32_t symbol)
Adds a symbol to the prospective DFA's alphabet.
Definition: dfa.cpp:587