reglibcpp  1.6.0
(Naïve) C++ implementation of models for regular languages
dfa.cpp
Go to the documentation of this file.
1 #include "dfa.h"
3 using std::valarray;
4 using std::vector;
5 using std::string;
6 using std::to_string;
7 using std::make_unique;
8 using std::u32string;
9 
10 using std::move;
11 
12 #include <unordered_set>
13 using std::unordered_set;
14 
15 #include <unordered_map>
16 using std::unordered_map;
17 
18 #include <forward_list>
19 using std::forward_list;
20 
21 #include <algorithm>
22 using std::find;
23 using std::none_of;
24 
25 #include <cstdint>
26 
27 #include "nfa.h"
28 #include "expression.h"
29 
30 namespace std {
31  template<>
32  struct hash<valarray<bool>> {
33  size_t operator()(valarray<bool> const& value) const {
34  size_t hash = 0;
35  for (bool v : value) {
36  hash <<= 1;
37  hash |= v;
38  }
39  return hash;
40  }
41  };
42 
43  template<>
44  struct equal_to<valarray<bool>> {
45  bool operator()(valarray<bool> const& lhs, valarray<bool> const& rhs) const {
46  return (lhs.size() == rhs.size() && (lhs == rhs).min() == true);
47  }
48  };
49 }
50 
51 namespace reg {
52 
54 struct dfa::pImpl {
60 
62  pImpl() : accepting(false, 1), alphabet(0), utf8Alphabet(0), labels(1), transitions(1) {}
63 
67  accepting(move(accepting)),
68  alphabet(move(alphabet)),
69  labels(move(labels)),
70  transitions(move(transitions)) {
71  utf8Alphabet.reserve(this->alphabet.size());
72  for (char32_t symbol : this->alphabet) {
73  utf8Alphabet.push_back(converter.to_bytes(symbol));
74  }
75  }
76 
78 
85  vector<valarray<bool>> distinguishable;
86  distinguishable.reserve(transitions.size());
87  for (size_t q(0); q < transitions.size(); q++) {
88  bool qAcc(accepting[q]);
89  distinguishable.push_back(valarray<bool>(false, transitions.size()));
90  for (size_t p(0); p < q; p++) {
91  bool pAcc(accepting[p]);
92  distinguishable[q][p] = (qAcc != pAcc);
93  distinguishable[p][q] = (qAcc != pAcc);
94  }
95  }
96  bool changes(true);
97  while (changes) {
98  changes = false;
99  for (size_t q(0); q < transitions.size(); q++) {
100  for (size_t p(0); p < q; p++) {
101  if (distinguishable[q][p]) {
102  continue;
103  }
104  for (size_t s(0); s < transitions[q].size(); s++) {
105  size_t qS(transitions[q][s]);
106  size_t pS(transitions[p][s]);
107  if (distinguishable[qS][pS]) {
108  changes = true;
109  distinguishable[q][p] = true;
110  distinguishable[p][q] = true;
111  break;
112  }
113  }
114  }
115  }
116  }
117  valarray<bool> allTrue(true, transitions.size());
118  for (size_t i(0); i < distinguishable.size(); i++) {
119  distinguishable[i] ^= allTrue;
120  }
121  return distinguishable;
122  }
123 };
124 
126 dfa::dfa() : p(new pImpl) {}
127 
129 dfa::dfa(vector<char32_t>& alphabet,
130  vector<vector<size_t>>& transitions,
131  vector<string>& labels,
132  valarray<bool>& acceptingStates) :
133  p(make_unique<pImpl>(alphabet, transitions, labels, acceptingStates)) {}
134 
136 dfa::dfa(dfa const& d) : p(new pImpl(*(d.p))) {}
137 
139 
140 dfa::dfa(dfa&& d) : p(new pImpl) {p.swap(d.p);}
141 
143 dfa::dfa(nfa const& n) : dfa(builder(n).build()) {}
144 
146 dfa::dfa(dfa::builder& b) : dfa(b.build()) {}
147 
149 dfa& dfa::operator=(const dfa& d) {
150  if (this != &d) {
151  p.reset(new pImpl(*(d.p)));
152  }
153  return *this;
154 }
155 
157 
159  if (this != &d) {
160  p.reset(new pImpl);
161  p.swap(d.p);
162  }
163  return *this;
164 }
165 
166 dfa::~dfa() = default;
167 
169 
173 bool dfa::operator==(dfa const& d) const {
174  forward_list<char32_t> unitedAlphabet(p->alphabet.begin(), p->alphabet.end());
175  size_t unitedAlphabetSize(p->alphabet.size());
176  for (char32_t symbol : d.getAlphabet()) {
177  if (find(p->alphabet.begin(), p->alphabet.end(), symbol) == p->alphabet.end()) {
178  unitedAlphabet.push_front(symbol);
179  unitedAlphabetSize++;
180  }
181  }
182  vector<vector<size_t>> tTable(getNumberOfStates()+d.getNumberOfStates()+1, vector<size_t>(unitedAlphabetSize));
183  valarray<bool> accepting(false, getNumberOfStates()+d.getNumberOfStates()+1);
184  for (size_t q(0); q < getNumberOfStates(); q++) {
185  size_t s(0);
186  vector<size_t>& row = tTable[q];
187  for (char32_t symbol : unitedAlphabet) {
188  try {
189  row[s] = delta(q, symbol);
190  } catch (std::domain_error e) {
191  row[s] = getNumberOfStates()+d.getNumberOfStates();
192  }
193  s++;
194  }
195  accepting[q] = isAccepting(q);
196  }
197  for (size_t q(0); q < d.getNumberOfStates(); q++) {
198  size_t s(0);
199  vector<size_t>& row = tTable[q+getNumberOfStates()];
200  for (char32_t symbol : unitedAlphabet) {
201  try {
202  row[s] = getNumberOfStates()+d.delta(q, symbol);
203  } catch (std::domain_error e) {
204  row[s] = getNumberOfStates()+d.getNumberOfStates();
205  }
206  s++;
207  }
208  accepting[q+getNumberOfStates()] = d.isAccepting(q);
209  }
210  for (size_t s(0); s < unitedAlphabetSize; s++) {
212  }
213  return pImpl::indistinguishableStates(tTable, accepting)[0][getNumberOfStates()];
214 }
215 
217 bool dfa::operator!=(dfa const& d) const {return !operator==(d);}
218 
220 
227 size_t dfa::delta(size_t qi, size_t si) const {
228  if (si >= p->alphabet.size()) {
229  throw std::domain_error("There is no symbol with index " + to_string(si));
230  }
231  if (qi >= p->labels.size()) {
232  throw std::out_of_range("There is no state with index " + to_string(qi));
233  }
234  return p->transitions[qi][si];
235 }
236 
238 
245 size_t dfa::delta(size_t qi, char32_t symbol) const {
246  try {
247  return delta(qi, index_of(p->alphabet, symbol));
248  } catch (std::domain_error e) {
249  throw std::domain_error(u8"δ is not defined for symbol " + converter.to_bytes(symbol));
250  }
251 }
252 
254 size_t dfa::delta(size_t qi, string const& utf8Symbol) const {
255  return delta(qi, converter.from_bytes(utf8Symbol)[0]);
256 }
257 
259 
266 string const& dfa::delta(string const& q, char32_t symbol) const {
267  try {
268  return getStates()[delta(index_of(getStates(), q), symbol)];
269  } catch (std::out_of_range e) {
270  throw std::out_of_range("There is no state named " + q);
271  }
272 }
273 
275 string const& dfa::delta(string const& q, string const& utf8Symbol) const {
276  return delta(q, converter.from_bytes(utf8Symbol)[0]);
277 }
278 
280 
287 size_t dfa::deltaHat(size_t qi, u32string const& word) const {
288  for (char32_t symbol : word) {
289  qi = delta(qi, symbol);
290  }
291  return qi;
292 }
293 
295 size_t dfa::deltaHat(size_t q, string const& utf8Word) const {
296  return deltaHat(q, converter.from_bytes(utf8Word));
297 }
298 
300 
307 string const& dfa::deltaHat(string const& q, u32string const& word) const {
308  return p->labels[deltaHat(index_of(getStates(), q), word)];
309 }
310 
312 string const& dfa::deltaHat(string const& q, string const& utf8Word) const {
313  return deltaHat(q, converter.from_bytes(utf8Word));
314 }
315 
318  return findShortestWord(*this);
319 }
320 
322 string dfa::getShortestUtf8Word() const {
323  return findShortestUtf8Word(*this);
324 }
325 
327 string const& dfa::getLabelOf(size_t q) const {
328  return getStates().at(q);
329 }
330 
332 
335 string const& dfa::getInitialState() const {
336  return getStates()[0];
337 }
338 
340 
344  return p->labels;
345 }
346 
348 
352  return p->alphabet;
353 }
354 
356 
360  return p->utf8Alphabet;
361 }
362 
364 size_t dfa::getNumberOfStates() const {
365  return getStates().size();
366 }
367 
369 size_t dfa::getNumberOfSymbols() const {
370  return getAlphabet().size();
371 }
372 
374 
379 bool dfa::isAccepting(size_t qi) const {
380  if (qi >= p->labels.size()) {
381  throw std::out_of_range("There is no state with index " + to_string(qi));
382  }
383  return p->accepting[qi];
384 }
385 
387 
392 bool dfa::isAccepting(string const& q) const {
393  try {
394  return isAccepting(index_of(getStates(), q));
395  } catch (std::out_of_range e) {
396  throw std::out_of_range("There is no state named " + q);
397  }
398 }
399 
401 
407  auto const& p = d.p;
408  if (p->accepting[0]) {
409  return U"";
410  }
411  unordered_map<size_t, u32string> shortestWords(p->labels.size());
412  size_t oldSize = 0;
413  shortestWords.emplace(0, U"");
414  while (shortestWords.size() > oldSize) {
415  oldSize = shortestWords.size();
416  for (auto const& stateWord : shortestWords) {
417  for (auto symbol : p->alphabet) {
418  size_t reached = d.delta(stateWord.first, symbol);
419  u32string shWord = stateWord.second + symbol;
420  if (p->accepting[reached]) {
421  return shWord;
422  }
423  if (shortestWords.find(reached) == shortestWords.end()) {
424  shortestWords.emplace(reached, shWord);
425  }
426  }
427  }
428  }
429  throw std::logic_error("This DFA doesn't accept any words!");
430 }
431 
433 string findShortestUtf8Word(dfa const& d) {
434  return converter.to_bytes(findShortestWord(d));
435 }
436 
438 
445 dfa::builder dfa::unite(dfa const& d1, dfa const& d2) {
446  return builder(d1).unite(d2);
447 }
448 
450 
457 dfa::builder dfa::intersect(dfa const& d1, dfa const& d2) {
458  return builder(d1).intersect(d2);
459 }
460 
462 
469 dfa::builder dfa::subtract(dfa const& d1, dfa const& d2) {
470  builder b1(d1);
471  for (auto symbol : d2.getAlphabet()) {
472  b1.addSymbol(symbol);
473  }
474  return b1.complement().unite(d2).complement();
475 }
476 
478 
486  return dfa::builder(d).complement();
487 }
488 
491 
494  string initial;
499 
501  pImpl() = default;
503 
506  string& initial,
510  ) : initial(move(initial)),
511  alphabet(move(alphabet)),
513  transitions(move(transitions)) {}
514 
516  bool isTrashState(string const& q) const {
517  if (acceptingStates.count(q) != 0) {
518  return false;
519  }
520  auto const& from = transitions.at(q);
521  for (char32_t symbol : alphabet) {
522  auto via = from.find(symbol);
523  if (via != from.end() && (*via).second != q) {
524  return false;
525  }
526  }
527  return true;
528  }
529 
531  string const& generateNewState() {
532  size_t q(0);
533  string newState;
534  while (transitions.find(newState = "q" + to_string(q)) != transitions.end()) {
535  q++;
536  }
537  if (transitions.empty()) {
538  initial = newState;
539  }
540  return transitions.insert(std::make_pair(newState, unordered_map<char32_t, string>())).first->first;
541  }
542 
544 
545  void complete() {
546  bool trashUsed(false);
547  string const& trashState = [&]{
548  for (auto const& entry : this->transitions) {
549  if (this->isTrashState(entry.first)) {
550  trashUsed = true;
551  return entry.first;
552  }
553  }
554  return this->generateNewState();
555  }();
556  for (auto& from : transitions) {
557  for (char32_t symbol : alphabet) {
558  if (from.second.find(symbol) == from.second.end()) {
559  from.second[symbol] = trashState;
560  trashUsed |= (from.first != trashState);
561  }
562  }
563  }
564  if (!trashUsed) {
565  transitions.erase(trashState);
566  }
567  }
568 };
569 
572 
575  auto initial = dfa.getLabelOf(0);
576  auto alphabet = unordered_set<char32_t>(dfa.getAlphabet().begin(), dfa.getAlphabet().end());
578  auto transitions = Dtransitionmap(dfa.getNumberOfStates());
579  p = make_unique<pImpl>(initial, alphabet, labels, transitions);
580  for (size_t q = 0; q < dfa.getNumberOfStates(); ++q) {
581  for (char32_t symbol : dfa.getAlphabet()) {
582  defineTransition(dfa.getLabelOf(q), dfa.getLabelOf(dfa.delta(q, symbol)), symbol);
583  }
584  setAccepting(dfa.getLabelOf(q), dfa.isAccepting(q));
585  }
586 }
587 
592  size_t stackSize(1);
593  auto initial = nfa.getLabelOf(stack.front());
594  auto alphabet = nfa.getAlphabet();
595  alphabet.erase(alphabet.begin());
596  unordered_set<char32_t> alphabetS(alphabet.begin(), alphabet.end());
597  unordered_set<string> acceptingStates;
598  Dtransitionmap transitions;
599  transitions[initial];
600  p = make_unique<pImpl>(initial, alphabetS, acceptingStates, transitions);
601  while (stackSize != 0) {
602  valarray<bool> current(move(stack.front()));
603  stack.pop_front();
604  stackSize--;
605  for (char32_t symbol : p->alphabet) {
606  valarray<bool> next(nfa.deltaHat(current, u32string(1, symbol)));
607  defineTransition(nfa.getLabelOf(current), nfa.getLabelOf(next), symbol);
608  auto equalToNext = [&](valarray<bool> const& ref)->bool{return (next == ref).min() == true;};
609  if (!equalToNext(current) &&
610  none_of(stack.begin(), stack.end(), equalToNext) &&
611  none_of(done.begin(), done.end(), equalToNext)) {
612  stack.push_front(next);
613  stackSize++;
614  }
615  }
616  for (size_t qi(0); qi < current.size(); qi++) {
617  if (current[qi] && nfa.isAccepting(qi)) {
618  setAccepting(nfa.getLabelOf(current), true);
619  break;
620  }
621  }
622  done.insert(current);
623  }
624 }
625 
627 dfa::builder::builder(builder const& b) : p(new pImpl(*(b.p))) {}
628 
630 
631 dfa::builder::builder(builder&& b) : p(new pImpl) {p.swap(b.p);}
632 
635  if (this != &b) {
636  p.reset(new pImpl(*(b.p)));
637  }
638  return *this;
639 }
640 
642 
644  if (this != &b) {
645  p.reset(new pImpl);
646  p.swap(b.p);
647  }
648  return *this;
649 }
650 
651 dfa::builder::~builder() = default;
652 
654 
659  p->alphabet.insert(symbol);
660  return *this;
661 }
662 
664 dfa::builder& dfa::builder::addSymbol(string const& utf8Symbol) {
665  return addSymbol(converter.from_bytes(utf8Symbol)[0]);
666 }
667 
669 
674 dfa::builder& dfa::builder::setAccepting(string const& state, bool accept) {
675  if (p->transitions.empty()) {
676  p->initial = state;
677  }
678  p->transitions[state];
679  if (accept) {
680  p->acceptingStates.insert(state);
681  } else {
682  p->acceptingStates.erase(state);
683  }
684  return *this;
685 }
686 
688 
693  p->initial = state;
694  p->transitions[state];
695  return *this;
696 }
697 
699 
708 dfa::builder& dfa::builder::defineTransition(string const& from, string const& to, char32_t symbol) {
709  if (p->transitions.empty()) {
710  p->initial = from;
711  }
712  p->transitions[to];
713  p->transitions[from][symbol] = to;
714  addSymbol(symbol);
715  return *this;
716 }
717 
719 dfa::builder& dfa::builder::defineTransition(string const& from, string const& to, string const& utf8Symbol) {
720  return defineTransition(from, to, converter.from_bytes(utf8Symbol)[0]);
721 }
722 
724 
734  p->complete();
735  vector<vector<size_t>> tTable(p->transitions.size());
736  valarray<bool> accepting(false, p->transitions.size());
737  vector<char32_t> sortedAlphabet(p->alphabet.begin(), p->alphabet.end());
738  std::sort(sortedAlphabet.begin(), sortedAlphabet.end());
739  vector<string> orderedStates;
740  orderedStates.reserve(p->transitions.size());
741  orderedStates.push_back(p->initial);
742  for (auto const& entry : p->transitions) {
743  if (entry.first != p->initial) {
744  orderedStates.push_back(entry.first);
745  }
746  }
747  for (size_t q(0); q < orderedStates.size(); q++) {
748  string const& qLabel(orderedStates[q]);
749  if (p->acceptingStates.find(qLabel) != p->acceptingStates.end()) {
750  accepting[q] = true;
751  }
752  for (size_t s(0); s < sortedAlphabet.size(); s++) {
753  tTable[q].push_back(index_of(orderedStates, p->transitions[qLabel][sortedAlphabet[s]]));
754  }
755  }
756  vector<valarray<bool>> equivalences(dfa::pImpl::indistinguishableStates(tTable, accepting));
757  for (size_t q(0); q < orderedStates.size(); q++) {
758  for (size_t first(0); first < equivalences[q].size(); first++) {
759  if (equivalences[q][first] && q > first) {
760  string const& superfluous(orderedStates[q]);
761  string const& remaining(orderedStates[first]);
762  p->acceptingStates.erase(superfluous);
763  p->transitions.erase(superfluous);
764  for (auto& from : p->transitions) {
765  for (auto& via : from.second) {
766  if (via.second == superfluous) {
767  via.second = remaining;
768  }
769  }
770  }
771  break;
772  }
773  }
774  }
775  return *this;
776 }
777 
779 
786  unordered_set<string> reachable(&(p->initial), &(p->initial)+1);
787  unordered_set<string> newReachable(reachable);
788  while (newReachable.size() > 0) {
789  unordered_set<string> oldReachable(move(newReachable));
790  newReachable.clear();
791  for (string const& reachableState : oldReachable) {
792  for (auto const& reachedState : p->transitions[reachableState]) {
793  if (reachable.insert(reachedState.second).second) {
794  newReachable.insert(reachedState.second);
795  }
796  }
797  }
798  }
799  for (auto it = p->transitions.begin(); it != p->transitions.end(); ) {
800  if (reachable.find(it->first) == reachable.end()) {
801  p->acceptingStates.erase(it->first);
802  it = p->transitions.erase(it);
803  } else {
804  it++;
805  }
806  }
807  unordered_set<string> producing(p->acceptingStates);
808  unordered_set<string> newProducing(producing);
809  while (newProducing.size() > 0) {
810  unordered_set<string> oldProducing(move(newProducing));
811  newProducing.clear();
812  for (auto const& from : p->transitions) {
813  for (auto const& via : from.second) {
814  if (producing.find(via.second) != producing.end()) {
815  if (producing.insert(from.first).second) {
816  newProducing.insert(from.first);
817  }
818  break;
819  }
820  }
821  }
822  }
823  for (auto it = p->transitions.begin(); it != p->transitions.end(); ) {
824  if (producing.find(it->first) == producing.end() && it->first != p->initial) {
825  p->acceptingStates.erase(it->first);
826  for (auto& via : p->transitions) {
827  for (auto itv = via.second.begin(); itv != via.second.end(); ) {
828  if (itv->second == it->first) {
829  itv = via.second.erase(itv);
830  } else {
831  itv++;
832  }
833  }
834  }
835  it = p->transitions.erase(it);
836  } else {
837  it++;
838  }
839  }
840  return *this;
841 }
842 
845  return purge().merge();
846 }
847 
849 
856  if (!p->transitions.empty()) {
857  auto const& oAlph = other.getAlphabet();
858  for (char32_t symbol : oAlph) {
859  addSymbol(symbol);
860  }
861  size_t trash = other.getNumberOfStates() * !!(p->alphabet.size() - oAlph.size());
862  p->complete();
863  vector<string> stateNames;
864  stateNames.reserve(p->transitions.size());
865  stateNames.push_back(p->initial);
866  for (auto const& fromVia : p->transitions) {
867  if (fromVia.first != p->initial) {
868  stateNames.push_back(fromVia.first);
869  }
870  }
871  Dtransitionmap newTr(stateNames.size() * other.getNumberOfStates());
872  unordered_set<string> newAcc(stateNames.size() * other.getNumberOfStates());
873  unordered_map<size_t, unordered_map<size_t, string const*>> pairToName(stateNames.size() * other.getNumberOfStates());
874  for (size_t q = 0; q < stateNames.size(); q++) {
875  for (size_t qq = 0; qq < other.getNumberOfStates() + !!trash; qq++) {
876  string potentialName = stateNames[q];
877  try {
878  potentialName.append(other.getLabelOf(qq));
879  } catch (std::out_of_range e) {
880  // We hit the trash state.
881  }
882  for (auto nameIt = newTr.find(potentialName); nameIt != newTr.end(); nameIt = newTr.find(potentialName)) {
883  potentialName.append("_");
884  }
885  auto nameIt = newTr.emplace(potentialName, p->alphabet.size());
886  pairToName.emplace(q, 1).first->second.emplace(qq, &(nameIt.first->first));
887  try {
888  if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end() || other.isAccepting(qq)) {
889  newAcc.insert(potentialName);
890  }
891  } catch (std::out_of_range e) {
892  // We hit the trash state AND q is not accepting.
893  }
894  }
895  }
896  for (size_t q = 0; q < stateNames.size(); q++) {
897  auto const& qLabel = stateNames[q];
898  auto const& viaTos = p->transitions[qLabel];
899  for (auto const& viaTo : viaTos) {
900  size_t p = index_of(stateNames, viaTo.second);
901  for (size_t qq = 0; qq < other.getNumberOfStates() + !!trash; qq++) {
902  size_t pp;
903  try {
904  pp = other.delta(qq, viaTo.first);
905  } catch (std::logic_error e) {
906  pp = trash;
907  }
908  newTr[*(pairToName[q][qq])][viaTo.first] = *(pairToName[p][pp]);
909  }
910  }
911  }
912  p->transitions = newTr;
913  p->acceptingStates = newAcc;
914  p->initial = *(pairToName[0][0]);
915  } else {
916  for (size_t q = 0; q < other.getNumberOfStates(); ++q) {
917  for (char32_t symbol : other.getAlphabet()) {
918  defineTransition(other.getLabelOf(q), other.getLabelOf(other.delta(q, symbol)), symbol);
919  }
920  setAccepting(other.getLabelOf(q), other.isAccepting(q));
921  }
922  }
923  return *this;
924 }
925 
927 
934  if (!p->transitions.empty()) {
935  vector<string> stateNames;
936  stateNames.reserve(p->transitions.size());
937  stateNames.push_back(p->initial);
938  for (auto const& fromTo : p->transitions) {
939  if (fromTo.first != p->initial) {
940  stateNames.push_back(fromTo.first);
941  }
942  }
943  auto const& oAlph = other.getAlphabet();
944  size_t commonSymbols = 0;
945  for (char32_t symbol : p->alphabet) {
946  if (index_of(oAlph, symbol) < oAlph.size()) {
947  commonSymbols++;
948  } else {
949  for (auto & fromVia : p->transitions) {
950  fromVia.second.erase(symbol);
951  }
952  }
953  }
954  p->alphabet.insert(oAlph.begin(), oAlph.end());
955  Dtransitionmap newTr(stateNames.size() * other.getNumberOfStates());
956  unordered_set<string> newAcc(stateNames.size() * other.getNumberOfStates());
957  unordered_map<size_t, unordered_map<size_t, string const*>> pairToName(stateNames.size() * other.getNumberOfStates());
958  for (size_t q = 0; q < stateNames.size(); q++) {
959  for (size_t qq = 0; qq < other.getNumberOfStates(); qq++) {
960  string potentialName = stateNames[q] + other.getLabelOf(qq);
961  for (auto nameIt = newTr.find(potentialName); nameIt != newTr.end(); nameIt = newTr.find(potentialName)) {
962  potentialName.append("_");
963  }
964  auto nameIt = newTr.emplace(potentialName, commonSymbols);
965  pairToName.emplace(q, 1).first->second.emplace(qq, &(nameIt.first->first));
966  if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end() && other.isAccepting(qq)) {
967  newAcc.insert(potentialName);
968  }
969  }
970  }
971  for (size_t q = 0; q < stateNames.size(); q++) {
972  auto const& qLabel = stateNames[q];
973  auto const& viaTos = p->transitions[qLabel];
974  for (auto const& viaTo : viaTos) {
975  size_t p = index_of(stateNames, viaTo.second);
976  for (size_t qq = 0; qq < other.getNumberOfStates(); qq++) {
977  size_t pp = other.delta(qq, viaTo.first);
978  newTr[*(pairToName[q][qq])][viaTo.first] = *(pairToName[p][pp]);
979  }
980  }
981  }
982  p->transitions = newTr;
983  p->acceptingStates = newAcc;
984  p->initial = *(pairToName[0][0]);
985  }
986  return *this;
987 }
988 
991  p->complete();
992  for (auto const& fromVia : p->transitions) {
993  if (!p->acceptingStates.erase(fromVia.first)) {
994  p->acceptingStates.insert(fromVia.first);
995  }
996  }
997  return *this;
998 }
999 
1002  if (!p->transitions.empty()) {
1003  vector<string> stateNames;
1004  stateNames.reserve(p->transitions.size());
1005  stateNames.push_back(p->initial);
1006  for (auto const& fromTo : p->transitions) {
1007  if (fromTo.first != p->initial) {
1008  stateNames.push_back(fromTo.first);
1009  }
1010  }
1011  Dtransitionmap newTr(p->transitions.size());
1012  unordered_set<string> newAcc(p->acceptingStates.size());
1013  for (size_t q = 0; q < stateNames.size(); q++) {
1014  auto const& vias = p->transitions[stateNames[q]];
1015  unordered_map<char32_t,string> newVias(vias.size());
1016  for (auto const& viaTo : vias) {
1017  newVias.insert(make_pair(viaTo.first, prefix + to_string(index_of(stateNames, viaTo.second))));
1018  }
1019  newTr.insert(make_pair(prefix + to_string(q), newVias));
1020  if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end()) {
1021  newAcc.insert(prefix + to_string(q));
1022  }
1023  }
1024  p->initial = prefix + "0";
1025  p->transitions = newTr;
1026  p->acceptingStates = newAcc;
1027  }
1028  return *this;
1029 }
1030 
1032 
1036  vector<char32_t> alphabetV(p->alphabet.begin(), p->alphabet.end());
1037  std::sort(alphabetV.begin(), alphabetV.end());
1038  p->complete();
1039  vector<string> labelV;
1040  labelV.reserve(p->transitions.size());
1041  labelV.push_back(p->initial);
1042  for (auto const& tr : p->transitions) {
1043  if (tr.first == p->initial) {
1044  continue;
1045  }
1046  labelV.push_back(tr.first);
1047  }
1048  valarray<bool> acceptingV(false, labelV.size());
1049  vector<vector<size_t>> transitionV(labelV.size(), vector<size_t>(alphabetV.size()));
1050  for (size_t q(0); q < labelV.size(); q++) {
1051  string const& qLabel = labelV[q];
1052  acceptingV[q] = (p->acceptingStates.find(qLabel) != p->acceptingStates.end());
1053  for (size_t s(0); s < alphabetV.size(); s++) {
1054  transitionV[q][s] = index_of(labelV, p->transitions[qLabel][alphabetV[s]]);
1055  }
1056  }
1057  return {alphabetV, transitionV, labelV, acceptingV};
1058 }
1059 
1061 } // namespace reg::dfa
bool operator!=(const dfa &d) const
Tests whether this DFA doesn't accept the same language as another one.
Definition: dfa.cpp:217
static dfa::builder complement(dfa const &d)
Creates a builder for a DFA accepting the complement of the language of a DFA.
Definition: dfa.cpp:485
std::string const & getInitialState() const
Names this DFA's initial state.
Definition: dfa.cpp:335
builder & setAccepting(std::string const &state, bool accept)
Sets whether or not a state will be accepting within the prospective DFA.
Definition: dfa.cpp:674
bool isAccepting(size_t qi) const
Tests whether a state is an accept state within this DFA.
Definition: dfa.cpp:379
builder & unite(dfa const &other)
Makes the prospective DFA also accept every word of another DFA&#39;s language.
Definition: dfa.cpp:855
Dtransitionmap transitions
Transition function (state × symbol → state) of the prospective DFA.
Definition: dfa.cpp:497
bool isTrashState(string const &q) const
Tests whether all of a state's outgoing transitions point to itself.
Definition: dfa.cpp:516
vector< char32_t > alphabet
Represents the set of processable symbols.
Definition: dfa.cpp:56
dfa()
Constructs a DFA accepting the empty language ∅.
Definition: dfa.cpp:126
Represents nondeterministic finite automata with ε-moves.
Definition: nfa.h:25
valarray< bool > accepting
A true value marks an index as belonging to an accept state.
Definition: dfa.cpp:55
std::vector< char32_t > const & getAlphabet() const
Fetches this NFA's set of processable symbols.
Definition: nfa.cpp:441
vector< string > utf8Alphabet
Represents the set of processable symbols as UTF-8-encoded strings.
Definition: dfa.cpp:57
std::string const & getLabelOf(size_t q) const
Definition: nfa.cpp:331
static dfa::builder unite(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the union of the languages of two DFAs.
Definition: dfa.cpp:445
Represents deterministic finite automata.
Definition: dfa.h:27
Private implementation details of DFAs.
Definition: dfa.cpp:54
std::wstring_convert< std::codecvt_utf8< char32_t >, char32_t > converter
Converts between UTF-8-encoded and UTF-32-encoded strings.
Definition: dfa.cpp:1060
builder & minimize()
Convenience method for chaining purge and merge to achieve proper minimization.
Definition: dfa.cpp:844
vector< string > labels
Stores the names of states.
Definition: dfa.cpp:58
vector< vector< size_t > > transitions
Stores the transition function as a table viz state index × symbol index → state index...
Definition: dfa.cpp:59
builder & purge()
Purges the prospective DFA of unreachable and non-producing states, allowing for minimization.
Definition: dfa.cpp:785
friend std::string findShortestUtf8Word(dfa const &d)
Same as above for a UTF-8-encoded word.
Definition: dfa.cpp:433
unordered_set< char32_t > alphabet
Set of symbols processable by the prospective DFA.
Definition: dfa.cpp:495
size_t delta(size_t qi, size_t si) const
Computes this DFA's transition function for a state index and a symbol index.
Definition: dfa.cpp:227
std::vector< char32_t > const & getAlphabet() const
Fetches this DFA's set of processable symbols.
Definition: dfa.cpp:351
pImpl(vector< char32_t > &alphabet, vector< vector< size_t >> &transitions, vector< string > &labels, valarray< bool > &accepting)
Constructs private implementation object with provided members.
Definition: dfa.cpp:65
static vector< valarray< bool > > indistinguishableStates(vector< vector< size_t >> const &transitions, valarray< bool > const &accepting)
Builds the table of indistinguishable states w.r.t. a transition function.
Definition: dfa.cpp:83
T min(T... args)
dfa & operator=(const dfa &d)
Copy-assigns this DFA by copying another one's private implementation object.
Definition: dfa.cpp:149
size_t getNumberOfSymbols() const
Definition: dfa.cpp:369
Private implementation details of DFA builders.
Definition: dfa.cpp:493
size_t index_of(C const &container, T const &element)
Basically Java&#39;s List interface&#39;s indexOf, but as a non-member function and returning the container&#39;s...
Definition: dfa.h:120
Contains the reg::nfa class definition.
size_t deltaHat(size_t qi, std::u32string const &word) const
Computes this DFA's transition function recursively for a string of symbols, starting in a state spec...
Definition: dfa.cpp:287
static dfa::builder subtract(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the set difference of the languages of two DFAs.
Definition: dfa.cpp:469
Contains the reg::expression class defintion.
friend std::u32string findShortestWord(dfa const &d)
Searches the shortest UTF-32-encoded word accepted by a given DFA.
Definition: dfa.cpp:406
T hash(T... args)
bool isAccepting(size_t qi) const
Tests whether a state is an accept state within this NFA.
Definition: nfa.cpp:469
bool operator==(const dfa &d) const
Tests whether this DFA accepts exactly the same language as another one.
Definition: dfa.cpp:173
builder & complement()
Inverts the prospective DFA&#39;s language with respect to all possible strings over its alphabet...
Definition: dfa.cpp:990
void complete()
Totalizes a partial transition function by pointing any undefined transitions towards a trash state...
Definition: dfa.cpp:545
std::vector< std::string > const & getStates() const
Fetches this DFA's set of states.
Definition: dfa.cpp:343
builder & makeInitial(std::string const &state)
Resets the initial state for the prospective DFA.
Definition: dfa.cpp:692
builder & merge()
Merges the prospective DFA's indistinguishable states, allowing for minimization. ...
Definition: dfa.cpp:733
Constructs DFAs step by step.
Definition: dfa.h:72
size_t getNumberOfStates() const
Definition: dfa.cpp:364
T operator()(T... args)
builder & normalizeStateNames(std::string const &prefix)
Reduces the prospective NFA&#39;s state names to consecutive numbers, prefixed with a given string...
Definition: dfa.cpp:1001
string initial
Name of the prospective DFA&#39;s initial state.
Definition: dfa.cpp:494
builder()
Constructs a blank builder object.
Definition: dfa.cpp:571
static dfa::builder intersect(dfa const &d1, dfa const &d2)
Creates a builder for a DFA accepting the intersection of the languages of two DFAs.
Definition: dfa.cpp:457
std::valarray< bool > deltaHat(size_t qi, std::u32string const &word) const
Computes this NFA's transition function recursively for a string of symbols, starting in a state spec...
Definition: nfa.cpp:164
string const & generateNewState()
Generates a uniquely named new state and adds it to the set of states.
Definition: dfa.cpp:531
Where this library lives.
Definition: dfa.cpp:51
unordered_set< string > acceptingStates
Set of names of the prospective DFA&#39;s accept states.
Definition: dfa.cpp:496
std::u32string getShortestWord() const
Definition: dfa.cpp:317
builder & intersect(dfa const &other)
Makes the prospective DFA accept only words accepted also by another DFA.
Definition: dfa.cpp:933
builder & operator=(const builder &b)
Copy-assigns a builder by copying another one's private implementation object.
Definition: dfa.cpp:634
string findShortestUtf8Word(dfa const &d)
Same as above for a UTF-8-encoded word.
Definition: dfa.cpp:433
pImpl()
Constructs private implementation object for a DFA accepting the empty language ∅.
Definition: dfa.cpp:62
std::string getShortestUtf8Word() const
Definition: dfa.cpp:322
pImpl()=default
Constructs empty private implementation object.
u32string findShortestWord(dfa const &d)
Searches the shortest UTF-32-encoded word accepted by a given DFA.
Definition: dfa.cpp:406
T operator()(T... args)
dfa build()
Builds the DFA, as defined by previous operations, including completion.
Definition: dfa.cpp:1035
std::string const & getLabelOf(size_t qi) const
Definition: dfa.cpp:327
std::valarray< bool > const & epsilonClosure(size_t qi) const
Computes a state's ε-closure.
Definition: nfa.cpp:259
std::vector< std::string > const & getUtf8Alphabet() const
Fetches this DFA's set of processable symbols as UTF-8-encoded strings.
Definition: dfa.cpp:359
builder & defineTransition(std::string const &from, std::string const &to, char32_t symbol)
(Re-)Defines a transition for the prospective DFA.
Definition: dfa.cpp:708
Contains the reg::dfa class definition.
pImpl(string &initial, unordered_set< char32_t > &alphabet, unordered_set< string > &acceptingStates, Dtransitionmap &transitions)
Constructs private implementation object with provided members.
Definition: dfa.cpp:505
unordered_map< string, unordered_map< char32_t, string > > Dtransitionmap
Shorthand for the map from state name and transition symbol to target state.
Definition: dfa.cpp:490
builder & addSymbol(char32_t symbol)
Adds a symbol to the prospective DFA's alphabet.
Definition: dfa.cpp:658