reglibcpp  1.7.0
(Naïve) C++ implementation of models for regular languages
dfa.cpp
Go to the documentation of this file.
1 #include "dfa.h"
3 using std::valarray;
4 using std::vector;
5 using std::string;
6 using std::to_string;
7 using std::make_unique;
8 using std::u32string;
9 
10 using std::move;
11 
12 #include <unordered_set>
13 using std::unordered_set;
14 
15 #include <unordered_map>
16 using std::unordered_map;
17 
18 #include <forward_list>
19 using std::forward_list;
20 
21 #include <algorithm>
22 using std::find;
23 using std::none_of;
24 
25 #include <cstdint>
26 
27 #include "nfa.h"
28 #include "utils.h"
29 
30 namespace std {
31  template<>
32  struct hash<valarray<bool>> {
33  size_t operator()(valarray<bool> const& value) const {
34  size_t hash = 0;
35  for (bool v : value) {
36  hash <<= 1;
37  hash |= v;
38  }
39  return hash;
40  }
41  };
42 
43  template<>
44  struct equal_to<valarray<bool>> {
45  bool operator()(valarray<bool> const& lhs, valarray<bool> const& rhs) const {
46  return (lhs.size() == rhs.size() && (lhs == rhs).min() == true);
47  }
48  };
49 }
50 
51 namespace reg {
52 
54 struct dfa::pImpl {
61 
63  pImpl() : accepting(false, 1), alphabet(0), utf8Alphabet(0), labels(1), transitions(1) {}
64 
68  accepting(move(accepting)),
69  alphabet(move(alphabet)),
70  labels(move(labels)),
71  transitions(move(transitions)) {
72  utf8Alphabet.reserve(this->alphabet.size());
73  for (char32_t symbol : this->alphabet) {
74  utf8Alphabet.push_back(converter.to_bytes(symbol));
75  }
76  }
77 
79 
86  vector<valarray<bool>> distinguishable;
87  distinguishable.reserve(transitions.size());
88  for (size_t q(0); q < transitions.size(); q++) {
89  bool qAcc(accepting[q]);
90  distinguishable.push_back(valarray<bool>(false, transitions.size()));
91  for (size_t p(0); p < q; p++) {
92  bool pAcc(accepting[p]);
93  distinguishable[q][p] = (qAcc != pAcc);
94  distinguishable[p][q] = (qAcc != pAcc);
95  }
96  }
97  bool changes(true);
98  while (changes) {
99  changes = false;
100  for (size_t q(0); q < transitions.size(); q++) {
101  for (size_t p(0); p < q; p++) {
102  if (distinguishable[q][p]) {
103  continue;
104  }
105  for (size_t s(0); s < transitions[q].size(); s++) {
106  size_t qS(transitions[q][s]);
107  size_t pS(transitions[p][s]);
108  if (distinguishable[qS][pS]) {
109  changes = true;
110  distinguishable[q][p] = true;
111  distinguishable[p][q] = true;
112  break;
113  }
114  }
115  }
116  }
117  }
118  valarray<bool> allTrue(true, transitions.size());
119  for (size_t i(0); i < distinguishable.size(); i++) {
120  distinguishable[i] ^= allTrue;
121  }
122  return distinguishable;
123  }
124 };
125 
127 dfa::dfa() : p(new pImpl) {}
128 
130 dfa::dfa(vector<char32_t>& alphabet,
131  vector<vector<size_t>>& transitions,
132  vector<string>& labels,
133  valarray<bool>& acceptingStates) :
134  p(make_unique<pImpl>(alphabet, transitions, labels, acceptingStates)) {}
135 
137 dfa::dfa(dfa const& d) : p(new pImpl(*(d.p))) {}
138 
140 
141 dfa::dfa(dfa&& d) : p(new pImpl) {p.swap(d.p);}
142 
144 dfa& dfa::operator=(dfa const& d) {
145  if (this != &d) {
146  p.reset(new pImpl(*(d.p)));
147  }
148  return *this;
149 }
150 
152 
154  if (this != &d) {
155  p.reset(new pImpl);
156  p.swap(d.p);
157  }
158  return *this;
159 }
160 
161 dfa::~dfa() = default;
162 
163 #ifndef DOXYGEN_SHOULD_SKIP_THIS
164 dfa::operator nfa const&() const {
165  if (!p->equivalent) {
166  p->equivalent = std::make_shared<nfa const>(nfa::builder(*this).build());
167  }
168  return *p->equivalent;
169 }
170 #endif // DOXYGEN_SHOULD_SKIP_THIS
171 
173 
177 bool dfa::operator==(dfa const& d) const {
178  forward_list<char32_t> unitedAlphabet(p->alphabet.begin(), p->alphabet.end());
179  size_t unitedAlphabetSize(p->alphabet.size());
180  for (char32_t symbol : d.getAlphabet()) {
181  if (find(p->alphabet.begin(), p->alphabet.end(), symbol) == p->alphabet.end()) {
182  unitedAlphabet.push_front(symbol);
183  unitedAlphabetSize++;
184  }
185  }
186  vector<vector<size_t>> tTable(getNumberOfStates()+d.getNumberOfStates()+1, vector<size_t>(unitedAlphabetSize));
187  valarray<bool> accepting(false, getNumberOfStates()+d.getNumberOfStates()+1);
188  for (size_t q(0); q < getNumberOfStates(); q++) {
189  size_t s(0);
190  vector<size_t>& row = tTable[q];
191  for (char32_t symbol : unitedAlphabet) {
192  try {
193  row[s] = delta(q, symbol);
194  } catch (std::domain_error e) {
195  row[s] = getNumberOfStates()+d.getNumberOfStates();
196  }
197  s++;
198  }
199  accepting[q] = isAccepting(q);
200  }
201  for (size_t q(0); q < d.getNumberOfStates(); q++) {
202  size_t s(0);
203  vector<size_t>& row = tTable[q+getNumberOfStates()];
204  for (char32_t symbol : unitedAlphabet) {
205  try {
206  row[s] = getNumberOfStates()+d.delta(q, symbol);
207  } catch (std::domain_error e) {
208  row[s] = getNumberOfStates()+d.getNumberOfStates();
209  }
210  s++;
211  }
212  accepting[q+getNumberOfStates()] = d.isAccepting(q);
213  }
214  for (size_t s(0); s < unitedAlphabetSize; s++) {
216  }
217  return pImpl::indistinguishableStates(tTable, accepting)[0][getNumberOfStates()];
218 }
219 
221 bool dfa::operator!=(dfa const& d) const {return !operator==(d);}
222 
224 bool dfa::operator==(nfa const& n) const {
225  return operator==(static_cast<dfa const&>(n));
226 }
227 
229 bool dfa::operator!=(nfa const& n) const {return !operator==(n);}
230 
232 
239 size_t dfa::delta(size_t qi, size_t si) const {
240  if (si >= p->alphabet.size()) {
241  throw std::domain_error("There is no symbol with index " + to_string(si));
242  }
243  if (qi >= p->labels.size()) {
244  throw std::out_of_range("There is no state with index " + to_string(qi));
245  }
246  return p->transitions[qi][si];
247 }
248 
250 
257 size_t dfa::delta(size_t qi, char32_t symbol) const {
258  try {
259  return delta(qi, index_of(p->alphabet, symbol));
260  } catch (std::domain_error e) {
261  throw std::domain_error(u8"δ is not defined for symbol " + converter.to_bytes(symbol));
262  }
263 }
264 
266 size_t dfa::delta(size_t qi, string const& utf8Symbol) const {
267  return delta(qi, converter.from_bytes(utf8Symbol)[0]);
268 }
269 
271 
278 string const& dfa::delta(string const& q, char32_t symbol) const {
279  try {
280  return getStates()[delta(index_of(getStates(), q), symbol)];
281  } catch (std::out_of_range e) {
282  throw std::out_of_range("There is no state named " + q);
283  }
284 }
285 
287 string const& dfa::delta(string const& q, string const& utf8Symbol) const {
288  return delta(q, converter.from_bytes(utf8Symbol)[0]);
289 }
290 
292 
299 size_t dfa::deltaHat(size_t qi, u32string const& word) const {
300  for (char32_t symbol : word) {
301  qi = delta(qi, symbol);
302  }
303  return qi;
304 }
305 
307 size_t dfa::deltaHat(size_t q, string const& utf8Word) const {
308  return deltaHat(q, converter.from_bytes(utf8Word));
309 }
310 
312 
319 string const& dfa::deltaHat(string const& q, u32string const& word) const {
320  return p->labels[deltaHat(index_of(getStates(), q), word)];
321 }
322 
324 string const& dfa::deltaHat(string const& q, string const& utf8Word) const {
325  return deltaHat(q, converter.from_bytes(utf8Word));
326 }
327 
330  return findShortestWord(*this);
331 }
332 
334 string dfa::getShortestUtf8Word() const {
335  return findShortestUtf8Word(*this);
336 }
337 
339 string const& dfa::getLabelOf(size_t q) const {
340  return getStates().at(q);
341 }
342 
344 
347 string const& dfa::getInitialState() const {
348  return getStates()[0];
349 }
350 
352 
356  return p->labels;
357 }
358 
360 
364  return p->alphabet;
365 }
366 
368 
372  return p->utf8Alphabet;
373 }
374 
376 size_t dfa::getNumberOfStates() const {
377  return getStates().size();
378 }
379 
381 size_t dfa::getNumberOfSymbols() const {
382  return getAlphabet().size();
383 }
384 
386 
391 bool dfa::isAccepting(size_t qi) const {
392  if (qi >= p->labels.size()) {
393  throw std::out_of_range("There is no state with index " + to_string(qi));
394  }
395  return p->accepting[qi];
396 }
397 
399 
404 bool dfa::isAccepting(string const& q) const {
405  try {
406  return isAccepting(index_of(getStates(), q));
407  } catch (std::out_of_range e) {
408  throw std::out_of_range("There is no state named " + q);
409  }
410 }
411 
413 
419  auto const& p = d.p;
420  if (p->accepting[0]) {
421  return U"";
422  }
423  unordered_map<size_t, u32string> shortestWords(p->labels.size());
424  size_t oldSize = 0;
425  shortestWords.emplace(0, U"");
426  while (shortestWords.size() > oldSize) {
427  oldSize = shortestWords.size();
428  for (auto const& stateWord : shortestWords) {
429  for (auto symbol : p->alphabet) {
430  size_t reached = d.delta(stateWord.first, symbol);
431  u32string shWord = stateWord.second + symbol;
432  if (p->accepting[reached]) {
433  return shWord;
434  }
435  if (shortestWords.find(reached) == shortestWords.end()) {
436  shortestWords.emplace(reached, shWord);
437  }
438  }
439  }
440  }
441  throw std::logic_error("This DFA doesn't accept any words!");
442 }
443 
445 string findShortestUtf8Word(dfa const& d) {
446  return converter.to_bytes(findShortestWord(d));
447 }
448 
450 
457 dfa::builder dfa::unite(dfa const& d1, dfa const& d2) {
458  return builder(d1).unite(d2);
459 }
460 
462 
469 dfa::builder dfa::intersect(dfa const& d1, dfa const& d2) {
470  return builder(d1).intersect(d2);
471 }
472 
474 
481 dfa::builder dfa::subtract(dfa const& d1, dfa const& d2) {
482  builder b1(d1);
483  for (auto symbol : d2.getAlphabet()) {
484  b1.addSymbol(symbol);
485  }
486  return b1.complement().unite(d2).complement();
487 }
488 
490 
498  return dfa::builder(d).complement();
499 }
500 
503 
506  string initial;
511 
513  pImpl() = default;
515 
518  string& initial,
522  ) : initial(move(initial)),
523  alphabet(move(alphabet)),
525  transitions(move(transitions)) {}
526 
528  bool isTrashState(string const& q) const {
529  if (acceptingStates.count(q) != 0) {
530  return false;
531  }
532  auto const& from = transitions.at(q);
533  for (char32_t symbol : alphabet) {
534  auto via = from.find(symbol);
535  if (via != from.end() && (*via).second != q) {
536  return false;
537  }
538  }
539  return true;
540  }
541 
543  string const& generateNewState() {
544  size_t q(0);
545  string newState;
546  while (transitions.find(newState = "q" + to_string(q)) != transitions.end()) {
547  q++;
548  }
549  if (transitions.empty()) {
550  initial = newState;
551  }
552  return transitions.insert(std::make_pair(newState, unordered_map<char32_t, string>())).first->first;
553  }
554 
556 
557  void complete() {
558  bool trashUsed(false);
559  string const& trashState = [&]{
560  for (auto const& entry : this->transitions) {
561  if (this->isTrashState(entry.first)) {
562  trashUsed = true;
563  return entry.first;
564  }
565  }
566  return this->generateNewState();
567  }();
568  for (auto& from : transitions) {
569  for (char32_t symbol : alphabet) {
570  if (from.second.find(symbol) == from.second.end()) {
571  from.second[symbol] = trashState;
572  trashUsed |= (from.first != trashState);
573  }
574  }
575  }
576  if (!trashUsed) {
577  transitions.erase(trashState);
578  }
579  }
580 };
581 
584 
587  auto initial = dfa.getLabelOf(0);
588  auto alphabet = unordered_set<char32_t>(dfa.getAlphabet().begin(), dfa.getAlphabet().end());
590  auto transitions = Dtransitionmap(dfa.getNumberOfStates());
591  p = make_unique<pImpl>(initial, alphabet, labels, transitions);
592  for (size_t q = 0; q < dfa.getNumberOfStates(); ++q) {
593  for (char32_t symbol : dfa.getAlphabet()) {
594  defineTransition(dfa.getLabelOf(q), dfa.getLabelOf(dfa.delta(q, symbol)), symbol);
595  }
596  setAccepting(dfa.getLabelOf(q), dfa.isAccepting(q));
597  }
598 }
599 
604  size_t stackSize(1);
605  auto initial = nfa.getLabelOf(stack.front());
606  auto alphabet = nfa.getAlphabet();
607  alphabet.erase(alphabet.begin());
608  unordered_set<char32_t> alphabetS(alphabet.begin(), alphabet.end());
609  unordered_set<string> acceptingStates;
610  Dtransitionmap transitions;
611  transitions[initial];
612  p = make_unique<pImpl>(initial, alphabetS, acceptingStates, transitions);
613  while (stackSize != 0) {
614  valarray<bool> current(move(stack.front()));
615  stack.pop_front();
616  stackSize--;
617  for (char32_t symbol : p->alphabet) {
618  valarray<bool> next(nfa.deltaHat(current, u32string(1, symbol)));
619  defineTransition(nfa.getLabelOf(current), nfa.getLabelOf(next), symbol);
620  auto equalToNext = [&](valarray<bool> const& ref)->bool{return (next == ref).min() == true;};
621  if (!equalToNext(current) &&
622  none_of(stack.begin(), stack.end(), equalToNext) &&
623  none_of(done.begin(), done.end(), equalToNext)) {
624  stack.push_front(next);
625  stackSize++;
626  }
627  }
628  for (size_t qi(0); qi < current.size(); qi++) {
629  if (current[qi] && nfa.isAccepting(qi)) {
630  setAccepting(nfa.getLabelOf(current), true);
631  break;
632  }
633  }
634  done.insert(current);
635  }
636 }
637 
639 dfa::builder::builder(builder const& b) : p(new pImpl(*(b.p))) {}
640 
642 
643 dfa::builder::builder(builder&& b) : p(new pImpl) {p.swap(b.p);}
644 
647  if (this != &b) {
648  p.reset(new pImpl(*(b.p)));
649  }
650  return *this;
651 }
652 
654 
656  if (this != &b) {
657  p.reset(new pImpl);
658  p.swap(b.p);
659  }
660  return *this;
661 }
662 
663 dfa::builder::~builder() = default;
664 
666 dfa::builder::operator dfa() {
667  return build();
668 }
669 
671 
676  p->alphabet.insert(symbol);
677  return *this;
678 }
679 
681 dfa::builder& dfa::builder::addSymbol(string const& utf8Symbol) {
682  return addSymbol(converter.from_bytes(utf8Symbol)[0]);
683 }
684 
686 
691 dfa::builder& dfa::builder::setAccepting(string const& state, bool accept) {
692  if (p->transitions.empty()) {
693  p->initial = state;
694  }
695  p->transitions[state];
696  if (accept) {
697  p->acceptingStates.insert(state);
698  } else {
699  p->acceptingStates.erase(state);
700  }
701  return *this;
702 }
703 
705 
710  p->initial = state;
711  p->transitions[state];
712  return *this;
713 }
714 
716 
725 dfa::builder& dfa::builder::defineTransition(string const& from, string const& to, char32_t symbol) {
726  if (p->transitions.empty()) {
727  p->initial = from;
728  }
729  p->transitions[to];
730  p->transitions[from][symbol] = to;
731  addSymbol(symbol);
732  return *this;
733 }
734 
736 dfa::builder& dfa::builder::defineTransition(string const& from, string const& to, string const& utf8Symbol) {
737  return defineTransition(from, to, converter.from_bytes(utf8Symbol)[0]);
738 }
739 
741 
751  p->complete();
752  vector<vector<size_t>> tTable(p->transitions.size());
753  valarray<bool> accepting(false, p->transitions.size());
754  vector<char32_t> sortedAlphabet(p->alphabet.begin(), p->alphabet.end());
755  std::sort(sortedAlphabet.begin(), sortedAlphabet.end());
756  vector<string> orderedStates;
757  orderedStates.reserve(p->transitions.size());
758  orderedStates.push_back(p->initial);
759  for (auto const& entry : p->transitions) {
760  if (entry.first != p->initial) {
761  orderedStates.push_back(entry.first);
762  }
763  }
764  for (size_t q(0); q < orderedStates.size(); q++) {
765  string const& qLabel(orderedStates[q]);
766  if (p->acceptingStates.find(qLabel) != p->acceptingStates.end()) {
767  accepting[q] = true;
768  }
769  for (size_t s(0); s < sortedAlphabet.size(); s++) {
770  tTable[q].push_back(index_of(orderedStates, p->transitions[qLabel][sortedAlphabet[s]]));
771  }
772  }
773  vector<valarray<bool>> equivalences(dfa::pImpl::indistinguishableStates(tTable, accepting));
774  for (size_t q(0); q < orderedStates.size(); q++) {
775  for (size_t first(0); first < equivalences[q].size(); first++) {
776  if (equivalences[q][first] && q > first) {
777  string const& superfluous(orderedStates[q]);
778  string const& remaining(orderedStates[first]);
779  p->acceptingStates.erase(superfluous);
780  p->transitions.erase(superfluous);
781  for (auto& from : p->transitions) {
782  for (auto& via : from.second) {
783  if (via.second == superfluous) {
784  via.second = remaining;
785  }
786  }
787  }
788  break;
789  }
790  }
791  }
792  return *this;
793 }
794 
796 
803  unordered_set<string> reachable(&(p->initial), &(p->initial)+1);
804  unordered_set<string> newReachable(reachable);
805  while (newReachable.size() > 0) {
806  unordered_set<string> oldReachable(move(newReachable));
807  newReachable.clear();
808  for (string const& reachableState : oldReachable) {
809  for (auto const& reachedState : p->transitions[reachableState]) {
810  if (reachable.insert(reachedState.second).second) {
811  newReachable.insert(reachedState.second);
812  }
813  }
814  }
815  }
816  for (auto it = p->transitions.begin(); it != p->transitions.end(); ) {
817  if (reachable.find(it->first) == reachable.end()) {
818  p->acceptingStates.erase(it->first);
819  it = p->transitions.erase(it);
820  } else {
821  it++;
822  }
823  }
824  unordered_set<string> producing(p->acceptingStates);
825  unordered_set<string> newProducing(producing);
826  while (newProducing.size() > 0) {
827  unordered_set<string> oldProducing(move(newProducing));
828  newProducing.clear();
829  for (auto const& from : p->transitions) {
830  for (auto const& via : from.second) {
831  if (producing.find(via.second) != producing.end()) {
832  if (producing.insert(from.first).second) {
833  newProducing.insert(from.first);
834  }
835  break;
836  }
837  }
838  }
839  }
840  for (auto it = p->transitions.begin(); it != p->transitions.end(); ) {
841  if (producing.find(it->first) == producing.end() && it->first != p->initial) {
842  p->acceptingStates.erase(it->first);
843  for (auto& via : p->transitions) {
844  for (auto itv = via.second.begin(); itv != via.second.end(); ) {
845  if (itv->second == it->first) {
846  itv = via.second.erase(itv);
847  } else {
848  itv++;
849  }
850  }
851  }
852  it = p->transitions.erase(it);
853  } else {
854  it++;
855  }
856  }
857  return *this;
858 }
859 
862  return purge().merge();
863 }
864 
866 
873  if (!p->transitions.empty()) {
874  auto const& oAlph = other.getAlphabet();
875  for (char32_t symbol : oAlph) {
876  addSymbol(symbol);
877  }
878  size_t trash = other.getNumberOfStates() * !!(p->alphabet.size() - oAlph.size());
879  p->complete();
880  vector<string> stateNames;
881  stateNames.reserve(p->transitions.size());
882  stateNames.push_back(p->initial);
883  for (auto const& fromVia : p->transitions) {
884  if (fromVia.first != p->initial) {
885  stateNames.push_back(fromVia.first);
886  }
887  }
888  Dtransitionmap newTr(stateNames.size() * other.getNumberOfStates());
889  unordered_set<string> newAcc(stateNames.size() * other.getNumberOfStates());
890  unordered_map<size_t, unordered_map<size_t, string const*>> pairToName(stateNames.size() * other.getNumberOfStates());
891  for (size_t q = 0; q < stateNames.size(); q++) {
892  for (size_t qq = 0; qq < other.getNumberOfStates() + !!trash; qq++) {
893  string potentialName = stateNames[q];
894  try {
895  potentialName.append(other.getLabelOf(qq));
896  } catch (std::out_of_range e) {
897  // We hit the trash state.
898  }
899  for (auto nameIt = newTr.find(potentialName); nameIt != newTr.end(); nameIt = newTr.find(potentialName)) {
900  potentialName.append("_");
901  }
902  auto nameIt = newTr.emplace(potentialName, p->alphabet.size());
903  pairToName.emplace(q, 1).first->second.emplace(qq, &(nameIt.first->first));
904  try {
905  if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end() || other.isAccepting(qq)) {
906  newAcc.insert(potentialName);
907  }
908  } catch (std::out_of_range e) {
909  // We hit the trash state AND q is not accepting.
910  }
911  }
912  }
913  for (size_t q = 0; q < stateNames.size(); q++) {
914  auto const& qLabel = stateNames[q];
915  auto const& viaTos = p->transitions[qLabel];
916  for (auto const& viaTo : viaTos) {
917  size_t p = index_of(stateNames, viaTo.second);
918  for (size_t qq = 0; qq < other.getNumberOfStates() + !!trash; qq++) {
919  size_t pp;
920  try {
921  pp = other.delta(qq, viaTo.first);
922  } catch (std::logic_error e) {
923  pp = trash;
924  }
925  newTr[*(pairToName[q][qq])][viaTo.first] = *(pairToName[p][pp]);
926  }
927  }
928  }
929  p->transitions = newTr;
930  p->acceptingStates = newAcc;
931  p->initial = *(pairToName[0][0]);
932  } else {
933  for (size_t q = 0; q < other.getNumberOfStates(); ++q) {
934  for (char32_t symbol : other.getAlphabet()) {
935  defineTransition(other.getLabelOf(q), other.getLabelOf(other.delta(q, symbol)), symbol);
936  }
937  setAccepting(other.getLabelOf(q), other.isAccepting(q));
938  }
939  }
940  return *this;
941 }
942 
944 
951  if (!p->transitions.empty()) {
952  vector<string> stateNames;
953  stateNames.reserve(p->transitions.size());
954  stateNames.push_back(p->initial);
955  for (auto const& fromTo : p->transitions) {
956  if (fromTo.first != p->initial) {
957  stateNames.push_back(fromTo.first);
958  }
959  }
960  auto const& oAlph = other.getAlphabet();
961  size_t commonSymbols = 0;
962  for (char32_t symbol : p->alphabet) {
963  if (index_of(oAlph, symbol) < oAlph.size()) {
964  commonSymbols++;
965  } else {
966  for (auto & fromVia : p->transitions) {
967  fromVia.second.erase(symbol);
968  }
969  }
970  }
971  p->alphabet.insert(oAlph.begin(), oAlph.end());
972  Dtransitionmap newTr(stateNames.size() * other.getNumberOfStates());
973  unordered_set<string> newAcc(stateNames.size() * other.getNumberOfStates());
974  unordered_map<size_t, unordered_map<size_t, string const*>> pairToName(stateNames.size() * other.getNumberOfStates());
975  for (size_t q = 0; q < stateNames.size(); q++) {
976  for (size_t qq = 0; qq < other.getNumberOfStates(); qq++) {
977  string potentialName = stateNames[q] + other.getLabelOf(qq);
978  for (auto nameIt = newTr.find(potentialName); nameIt != newTr.end(); nameIt = newTr.find(potentialName)) {
979  potentialName.append("_");
980  }
981  auto nameIt = newTr.emplace(potentialName, commonSymbols);
982  pairToName.emplace(q, 1).first->second.emplace(qq, &(nameIt.first->first));
983  if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end() && other.isAccepting(qq)) {
984  newAcc.insert(potentialName);
985  }
986  }
987  }
988  for (size_t q = 0; q < stateNames.size(); q++) {
989  auto const& qLabel = stateNames[q];
990  auto const& viaTos = p->transitions[qLabel];
991  for (auto const& viaTo : viaTos) {
992  size_t p = index_of(stateNames, viaTo.second);
993  for (size_t qq = 0; qq < other.getNumberOfStates(); qq++) {
994  size_t pp = other.delta(qq, viaTo.first);
995  newTr[*(pairToName[q][qq])][viaTo.first] = *(pairToName[p][pp]);
996  }
997  }
998  }
999  p->transitions = newTr;
1000  p->acceptingStates = newAcc;
1001  p->initial = *(pairToName[0][0]);
1002  }
1003  return *this;
1004 }
1005 
1008  p->complete();
1009  for (auto const& fromVia : p->transitions) {
1010  if (!p->acceptingStates.erase(fromVia.first)) {
1011  p->acceptingStates.insert(fromVia.first);
1012  }
1013  }
1014  return *this;
1015 }
1016 
1019  if (!p->transitions.empty()) {
1020  vector<string> stateNames;
1021  stateNames.reserve(p->transitions.size());
1022  stateNames.push_back(p->initial);
1023  for (auto const& fromTo : p->transitions) {
1024  if (fromTo.first != p->initial) {
1025  stateNames.push_back(fromTo.first);
1026  }
1027  }
1028  Dtransitionmap newTr(p->transitions.size());
1029  unordered_set<string> newAcc(p->acceptingStates.size());
1030  for (size_t q = 0; q < stateNames.size(); q++) {
1031  auto const& vias = p->transitions[stateNames[q]];
1032  unordered_map<char32_t,string> newVias(vias.size());
1033  for (auto const& viaTo : vias) {
1034  newVias.insert(make_pair(viaTo.first, prefix + to_string(index_of(stateNames, viaTo.second))));
1035  }
1036  newTr.insert(make_pair(prefix + to_string(q), newVias));
1037  if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end()) {
1038  newAcc.insert(prefix + to_string(q));
1039  }
1040  }
1041  p->initial = prefix + "0";
1042  p->transitions = newTr;
1043  p->acceptingStates = newAcc;
1044  }
1045  return *this;
1046 }
1047 
1049 
1053  vector<char32_t> alphabetV(p->alphabet.begin(), p->alphabet.end());
1054  std::sort(alphabetV.begin(), alphabetV.end());
1055  p->complete();
1056  vector<string> labelV;
1057  labelV.reserve(p->transitions.size());
1058  labelV.push_back(p->initial);
1059  for (auto const& tr : p->transitions) {
1060  if (tr.first == p->initial) {
1061  continue;
1062  }
1063  labelV.push_back(tr.first);
1064  }
1065  valarray<bool> acceptingV(false, labelV.size());
1066  vector<vector<size_t>> transitionV(labelV.size(), vector<size_t>(alphabetV.size()));
1067  for (size_t q(0); q < labelV.size(); q++) {
1068  string const& qLabel = labelV[q];
1069  acceptingV[q] = (p->acceptingStates.find(qLabel) != p->acceptingStates.end());
1070  for (size_t s(0); s < alphabetV.size(); s++) {
1071  transitionV[q][s] = index_of(labelV, p->transitions[qLabel][alphabetV[s]]);
1072  }
1073  }
1074  return {alphabetV, transitionV, labelV, acceptingV};
1075 }
1076 
1077 } // namespace reg::dfa
static dfa::builder complement(dfa const &d)
Creates a builder for a DFA accepting the complement of the language of a DFA.
Definition: dfa.cpp:497
std::string const & getInitialState() const
Names this DFA's initial state.
Definition: dfa.cpp:347
builder & setAccepting(std::string const &state, bool accept)
Sets whether or not a state will be accepting within the prospective DFA.
Definition: dfa.cpp:691
bool isAccepting(size_t qi) const
Tests whether a state is an accept state within this DFA.
Definition: dfa.cpp:391
builder & unite(dfa const &other)
Makes the prospective DFA also accept every word of another DFA&#39;s language.
Definition: dfa.cpp:872
nfa build()
Builds the NFA, as defined by previous operations.
Definition: nfa.cpp:1003
Dtransitionmap transitions
Transition function (state × symbol → state) of the prospective DFA.
Definition: dfa.cpp:509
bool isTrashState(string const &q) const
Tests whether all of a state's outgoing transitions point to itself.
Definition: dfa.cpp:528
vector< char32_t > alphabet
Represents the set of processable symbols.
Definition: dfa.cpp:57
dfa()
Constructs a DFA accepting the empty language ∅.
Definition: dfa.cpp:127
Represents nondeterministic finite automata with ε-moves.
Definition: nfa.h:23
valarray< bool > accepting
A true value marks an index as belonging to an accept state.
Definition: dfa.cpp:56
std::vector< char32_t > const & getAlphabet() const
Fetches this NFA's set of processable symbols.
Definition: nfa.cpp:445
vector< string > utf8Alphabet
Represents the set of processable symbols as UTF-8-encoded strings.
Definition: dfa.cpp:58
bool operator==(dfa const &d) const
Tests whether this DFA accepts exactly the same language as another one.
Definition: dfa.cpp:177
std::string const & getLabelOf(size_t q) const
Definition: nfa.cpp:335
static dfa::builder unite(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the union of the languages of two DFAs.
Definition: dfa.cpp:457
Represents deterministic finite automata.
Definition: dfa.h:21
Private implementation details of DFAs.
Definition: dfa.cpp:54
Constructs NFAs step by step.
Definition: nfa.h:91
std::wstring_convert< std::codecvt_utf8< char32_t >, char32_t > converter
Converts between UTF-8-encoded and UTF-32-encoded strings.
Definition: utils.cpp:4
builder & minimize()
Convenience method for chaining purge and merge to achieve proper minimization.
Definition: dfa.cpp:861
vector< string > labels
Stores the names of states.
Definition: dfa.cpp:59
vector< vector< size_t > > transitions
Stores the transition function as a table viz state index × symbol index → state index...
Definition: dfa.cpp:60
builder & purge()
Purges the prospective DFA of unreachable and non-producing states, allowing for minimization.
Definition: dfa.cpp:802
friend std::string findShortestUtf8Word(dfa const &d)
Same as above for a UTF-8-encoded word.
Definition: dfa.cpp:445
unordered_set< char32_t > alphabet
Set of symbols processable by the prospective DFA.
Definition: dfa.cpp:507
size_t delta(size_t qi, size_t si) const
Computes this DFA's transition function for a state index and a symbol index.
Definition: dfa.cpp:239
std::vector< char32_t > const & getAlphabet() const
Fetches this DFA's set of processable symbols.
Definition: dfa.cpp:363
pImpl(vector< char32_t > &alphabet, vector< vector< size_t >> &transitions, vector< string > &labels, valarray< bool > &accepting)
Constructs private implementation object with provided members.
Definition: dfa.cpp:66
static vector< valarray< bool > > indistinguishableStates(vector< vector< size_t >> const &transitions, valarray< bool > const &accepting)
Builds the table of indistinguishable states w.r.t. a transition function.
Definition: dfa.cpp:84
T min(T... args)
bool operator!=(dfa const &d) const
Tests whether this DFA doesn't accept the same language as another one.
Definition: dfa.cpp:221
size_t getNumberOfSymbols() const
Definition: dfa.cpp:381
Private implementation details of DFA builders.
Definition: dfa.cpp:505
size_t index_of(C const &container, T const &element)
Basically Java&#39;s List interface&#39;s indexOf, but as a non-member function and returning the container&#39;s...
Definition: utils.h:19
Contains the reg::nfa class definition.
size_t deltaHat(size_t qi, std::u32string const &word) const
Computes this DFA's transition function recursively for a string of symbols, starting in a state spec...
Definition: dfa.cpp:299
static dfa::builder subtract(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the set difference of the languages of two DFAs.
Definition: dfa.cpp:481
friend std::u32string findShortestWord(dfa const &d)
Searches the shortest UTF-32-encoded word accepted by a given DFA.
Definition: dfa.cpp:418
T hash(T... args)
bool isAccepting(size_t qi) const
Tests whether a state is an accept state within this NFA.
Definition: nfa.cpp:473
builder & complement()
Inverts the prospective DFA&#39;s language with respect to all possible strings over its alphabet...
Definition: dfa.cpp:1007
void complete()
Totalizes a partial transition function by pointing any undefined transitions towards a trash state...
Definition: dfa.cpp:557
std::vector< std::string > const & getStates() const
Fetches this DFA's set of states.
Definition: dfa.cpp:355
builder & makeInitial(std::string const &state)
Resets the initial state for the prospective DFA.
Definition: dfa.cpp:709
builder & merge()
Merges the prospective DFA's indistinguishable states, allowing for minimization. ...
Definition: dfa.cpp:750
Constructs DFAs step by step.
Definition: dfa.h:67
size_t getNumberOfStates() const
Definition: dfa.cpp:376
T operator()(T... args)
builder & normalizeStateNames(std::string const &prefix)
Reduces the prospective NFA&#39;s state names to consecutive numbers, prefixed with a given string...
Definition: dfa.cpp:1018
string initial
Name of the prospective DFA&#39;s initial state.
Definition: dfa.cpp:506
builder()
Constructs a blank builder object.
Definition: dfa.cpp:583
static dfa::builder intersect(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the intersection of the languages of two DFAs.
Definition: dfa.cpp:469
std::valarray< bool > deltaHat(size_t qi, std::u32string const &word) const
Computes this NFA's transition function recursively for a string of symbols, starting in a state spec...
Definition: nfa.cpp:168
string const & generateNewState()
Generates a uniquely named new state and adds it to the set of states.
Definition: dfa.cpp:543
Where this library lives.
Definition: dfa.cpp:51
unordered_set< string > acceptingStates
Set of names of the prospective DFA&#39;s accept states.
Definition: dfa.cpp:508
std::u32string getShortestWord() const
Definition: dfa.cpp:329
builder & intersect(dfa const &other)
Makes the prospective DFA accept only words accepted also by another DFA.
Definition: dfa.cpp:950
builder & operator=(const builder &b)
Copy-assigns a builder by copying another one's private implementation object.
Definition: dfa.cpp:646
string findShortestUtf8Word(dfa const &d)
Same as above for a UTF-8-encoded word.
Definition: dfa.cpp:445
pImpl()
Constructs private implementation object for a DFA accepting the empty language ∅.
Definition: dfa.cpp:63
std::string getShortestUtf8Word() const
Definition: dfa.cpp:334
pImpl()=default
Constructs empty private implementation object.
u32string findShortestWord(dfa const &d)
Searches the shortest UTF-32-encoded word accepted by a given DFA.
Definition: dfa.cpp:418
T operator()(T... args)
dfa build()
Builds the DFA, as defined by previous operations, including completion.
Definition: dfa.cpp:1052
std::string const & getLabelOf(size_t qi) const
Definition: dfa.cpp:339
std::valarray< bool > const & epsilonClosure(size_t qi) const
Computes a state's ε-closure.
Definition: nfa.cpp:263
std::vector< std::string > const & getUtf8Alphabet() const
Fetches this DFA's set of processable symbols as UTF-8-encoded strings.
Definition: dfa.cpp:371
builder & defineTransition(std::string const &from, std::string const &to, char32_t symbol)
(Re-)Defines a transition for the prospective DFA.
Definition: dfa.cpp:725
std::shared_ptr< nfa const > equivalent
Holds an equivalent NFA in case it is ever needed.
Definition: dfa.cpp:55
Contains the reg::dfa class definition.
pImpl(string &initial, unordered_set< char32_t > &alphabet, unordered_set< string > &acceptingStates, Dtransitionmap &transitions)
Constructs private implementation object with provided members.
Definition: dfa.cpp:517
unordered_map< string, unordered_map< char32_t, string > > Dtransitionmap
Shorthand for the map from state name and transition symbol to target state.
Definition: dfa.cpp:502
builder & addSymbol(char32_t symbol)
Adds a symbol to the prospective DFA's alphabet.
Definition: dfa.cpp:675
dfa & operator=(dfa const &d)
Copy-assigns this DFA by copying another one's private implementation object.
Definition: dfa.cpp:144