reglibcpp  1.7.0
(Naïve) C++ implementation of models for regular languages
nfa.cpp
Go to the documentation of this file.
1 #include "nfa.h"
4 using std::valarray;
5 using std::vector;
7 using std::string;
8 using std::make_unique;
9 using std::make_pair;
10 using std::u32string;
11 
12 using std::move;
13 
14 #include <unordered_map>
15 using std::unordered_map;
16 
17 #include <algorithm>
18 using std::find;
19 using std::min;
20 
21 #include <iterator>
22 
23 #include "dfa.h"
24 #include "expression.h"
25 
26 namespace reg {
27 
29 struct nfa::pImpl {
37 
39  pImpl() :
40  equivalent(),
41  accepting(false, 1),
42  alphabet(1, U'\0'),
43  utf8Alphabet(1, ""),
44  epsClosures(1),
45  labels(1),
46  transitions(1, vector<valarray<bool>>(1, valarray<bool>(false, 1))) {}
47 
50  vector<string>& labels, valarray<bool>& acceptingStates) :
51  accepting(move(acceptingStates)),
52  alphabet(move(alphabet)),
53  labels(move(labels)),
54  transitions(move(transitions)) {
55  utf8Alphabet.reserve(this->alphabet.size());
56  for (char32_t symbol : this->alphabet) {
57  if (symbol == U'\0') {
58  utf8Alphabet.push_back("");
59  } else {
60  utf8Alphabet.push_back(converter.to_bytes(symbol));
61  }
62  }
64  }
65 };
66 
68 nfa::nfa() : p(new pImpl) {}
69 
71 nfa::nfa(vector<char32_t>& alphabet, vector<vector<valarray<bool>>>& transitions,
72  vector<string>& labels, valarray<bool>& acceptingStates) :
73  p(new pImpl(alphabet, transitions, labels, acceptingStates)) {}
74 
75 #ifndef DOXYGEN_SHOULD_SKIP_THIS
76 nfa::operator dfa const&() const {
77  if (!p->equivalent) {
78  p->equivalent = std::make_shared<dfa const>(dfa::builder(*this).minimize().normalizeStateNames("").build());
79  }
80  return *p->equivalent;
81 }
82 #endif // DOXYGEN_SHOULD_SKIP_THIS
83 
85 
88 bool nfa::operator==(nfa const& n) const {
89  return static_cast<dfa const&>(*this).operator==(n);
90 }
91 
93 
96 bool nfa::operator!=(nfa const& n) const {
97  return static_cast<dfa const&>(*this).operator!=(n);
98 }
99 
101 
108 valarray<bool> const& nfa::delta(size_t qi, size_t si) const {
109  if (si >= p->alphabet.size()) {
110  throw std::domain_error("There is no symbol with index " + std::to_string(si));
111  }
112  if (qi >= p->labels.size()) {
113  throw std::out_of_range("There is no state with index " + std::to_string(qi));
114  }
115  return p->transitions[qi][si];
116 }
117 
119 
126 valarray<bool> const& nfa::delta(size_t qi, char32_t symbol) const {
127  try {
128  return delta(qi, index_of(p->alphabet, symbol));
129  } catch (std::domain_error e) {
130  throw std::domain_error(u8"δ is not defined for symbol " + converter.to_bytes(symbol));
131  }
132 }
133 
135 valarray<bool> const& nfa::delta(size_t qi, string const& utf8Symbol) const {
136  return delta(qi, converter.from_bytes(utf8Symbol)[0]);
137 }
138 
140 
147 nfa::state_set nfa::delta(string const& q, char32_t symbol) const {
148  try {
149  return decodeSet(delta(index_of(getStates(), q), symbol));
150  } catch (std::out_of_range e) {
151  throw std::out_of_range("There is no state named " + q);
152  }
153 }
154 
156 nfa::state_set nfa::delta(string const& q, string const& utf8Symbol) const {
157  return delta(q, converter.from_bytes(utf8Symbol)[0]);
158 }
159 
161 
168 valarray<bool> nfa::deltaHat(size_t qi, u32string const& word) const {
169  if (qi >= p->labels.size()) {
170  throw std::out_of_range("There is no state with index " + std::to_string(qi));
171  }
172  valarray<bool> qs(p->labels.size());
173  qs[qi] = true;
174  return deltaHat(qs, word);
175 }
176 
178 valarray<bool> nfa::deltaHat(size_t qi, string const& utf8Word) const {
179  return deltaHat(qi, converter.from_bytes(utf8Word));
180 }
181 
183 
190 nfa::state_set nfa::deltaHat(string const& q, u32string const& word) const {
191  try {
192  return decodeSet(deltaHat(index_of(getStates(), q), word));
193  } catch (std::out_of_range e) {
194  throw std::out_of_range("There is no state named " + q);
195  }
196 }
197 
199 nfa::state_set nfa::deltaHat(string const& q, string const& utf8Word) const {
200  return deltaHat(q, converter.from_bytes(utf8Word));
201 }
202 
204 
210 valarray<bool> nfa::deltaHat(valarray<bool> const& qs, u32string const& word) const {
212  for (char32_t symbol : word) {
213  valarray<bool> reached(p->labels.size());
214  size_t qi(0);
215  for (bool qb : ps) {
216  if (qb) {
217  reached |= epsilonClosure(delta(qi, symbol));
218  }
219  qi++;
220  }
221  ps = move(reached);
222  }
223  return ps;
224 }
225 
227 valarray<bool> nfa::deltaHat(valarray<bool> const& qs, string const& utf8Word) const {
228  return deltaHat(qs, converter.from_bytes(utf8Word));
229 }
230 
232 
238 nfa::state_set nfa::deltaHat(nfa::state_set const& qs, u32string const& word) const {
239  return decodeSet(deltaHat(encodeSet(qs), word));
240 }
241 
243 nfa::state_set nfa::deltaHat(nfa::state_set const& qs, string const& utf8Word) const {
244  return deltaHat(qs, converter.from_bytes(utf8Word));
245 }
246 
249  return findShortestWord(*this);
250 }
251 
253 string nfa::getShortestUtf8Word() const {
254  return findShortestUtf8Word(*this);
255 }
256 
258 
263 valarray<bool> const& nfa::epsilonClosure(size_t qi) const {
264  if (qi >= p->labels.size()) {
265  throw std::out_of_range("There is no state with index " + std::to_string(qi));
266  }
267  if (p->epsClosures[qi].size() == 0) {
268  p->epsClosures[qi].resize(p->labels.size());
269  p->epsClosures[qi][qi] = true;
270  int growth(1);
271  while (growth > 0) {
272  growth = 0;
273  valarray<bool> old(p->epsClosures[qi]);
274  size_t qqi(0);
275  for (bool qqb : old) {
276  if (qqb) {
277  size_t pi(0);
278  for (bool pb : p->transitions[qqi][0]) {
279  if (pb) {
280  if (!p->epsClosures[qi][pi]) {
281  growth++;
282  p->epsClosures[qi][pi] = true;
283  }
284  }
285  pi++;
286  } // going through the true bools in transitions[qqi][0]
287  }
288  qqi++;
289  } // going through the true bools in old
290  }
291  }
292  return p->epsClosures[qi];
293 }
294 
296 
301 nfa::state_set nfa::epsilonClosure(string const& q) const {
302  try {
304  } catch (std::out_of_range e) {
305  throw std::out_of_range("There is no state named " + q);
306  }
307 }
308 
310 
315  valarray<bool> closure(p->labels.size());
316  size_t range = min(qs.size(), getNumberOfStates());
317  for (size_t q = 0; q < range; q++) {
318  if (qs[q]) {
319  closure |= epsilonClosure(q);
320  }
321  }
322  return closure;
323 }
324 
326 
331  return decodeSet(epsilonClosure(encodeSet(qs)));
332 }
333 
335 string const& nfa::getLabelOf(size_t qi) const {
336  return p->labels.at(qi);
337 }
338 
340 
344 string nfa::to_string(valarray<bool> const& qs) const {
345  string label;
346  size_t range = min(qs.size(), getNumberOfStates());
347  for (size_t q = 0; q < range; q++) {
348  if (qs[q]) {
349  label = label.append(1, label.length() == 0 ? '{' : ',');
350  label = label.append(p->labels.at(q));
351  }
352  }
353  if (label.length() == 0) {
354  return string("{}");
355  } else {
356  return label.append(1, '}');
357  }
358 }
359 
361 
365 string nfa::to_string(nfa::state_set const& qs) const {
366  return getLabelOf(encodeSet(qs));
367 }
368 
370 string nfa::getLabelOf(valarray<bool> const& qs) const {
371  return to_string(qs);
372 }
373 
375 string nfa::getLabelOf(nfa::state_set const& qs) const {
376  return to_string(qs);
377 }
378 
380 
385  nfa::state_set states;
386  states.reserve(getNumberOfStates());
387  size_t range = min(qs.size(), getNumberOfStates());
388  for (size_t q = 0; q < range; q++) {
389  if (qs[q]) {
390  states.emplace(getStates()[q]);
391  }
392  }
393  return states;
394 }
395 
397 
403  for (size_t qi = 0; qi < getNumberOfStates(); qi++) {
404  states[qi] = qs.count(getStates()[qi]);
405  }
406  return states;
407 }
408 
410 
413 string const& nfa::getInitialState() const {
414  return getStates()[0];
415 }
416 
418 
422  return p->labels;
423 }
424 
426 
430  return nfa::state_set(getStates().begin(), getStates().end());
431 }
432 
434 
438  return valarray<bool>(true, getNumberOfStates());
439 }
440 
442 
446  return p->alphabet;
447 }
448 
450 
454  return p->utf8Alphabet;
455 }
456 
458 size_t nfa::getNumberOfStates() const {
459  return getStates().size();
460 }
461 
463 size_t nfa::getNumberOfSymbols() const {
464  return getAlphabet().size();
465 }
466 
468 
473 bool nfa::isAccepting(size_t qi) const {
474  if (qi >= p->labels.size()) {
475  throw std::out_of_range("There is no state with index " + std::to_string(qi));
476  }
477  return p->accepting[qi];
478 }
479 
481 
486 bool nfa::isAccepting(string const& q) const {
487  try {
488  return isAccepting(index_of(getStates(), q));
489  } catch (std::out_of_range e) {
490  throw std::out_of_range("There is no state named " + q);
491  }
492 }
493 
495 
499 bool nfa::isAccepting(valarray<bool> const& qs) const {
500  size_t range = min(qs.size(), getNumberOfStates());
501  for (size_t q = 0; q < range; q++) {
502  if (qs[q] && p->accepting[q]) {
503  return true;
504  }
505  }
506  return false;
507 }
508 
510 
514 bool nfa::isAccepting(nfa::state_set const& qs) const {
515  for (size_t qi = 0; qi < getNumberOfStates(); qi++) {
516  if (p->accepting[qi] && qs.count(getStates()[qi])) {
517  return true;
518  }
519  }
520  return false;
521 }
522 
524 bool nfa::hasAccepting(valarray<bool> const& qs) const {
525  return isAccepting(qs);
526 }
527 
529 bool nfa::hasAccepting(nfa::state_set const& qs) const {
530  return isAccepting(qs);
531 }
532 
534 
540  auto const& p = n.p;
541  if (p->accepting[0]) {
542  return U"";
543  }
544  unordered_map<size_t, u32string> shortestWords(p->labels.size());
545  size_t oldSize = 0;
546  shortestWords.emplace(0, U"");
547  while (shortestWords.size() > oldSize) {
548  oldSize = shortestWords.size();
549  for (auto const& stateWord : shortestWords) {
550  for (auto symbol : p->alphabet) {
551  valarray<bool> reached = n.deltaHat(stateWord.first, u32string(!!symbol, symbol));
552  u32string shWord = stateWord.second + u32string(!!symbol, symbol);
553  for (size_t q = 0; q < reached.size(); q++) { if (reached[q]) {
554  if (p->accepting[q]) {
555  return shWord;
556  }
557  if (shortestWords.find(q) == shortestWords.end()) {
558  shortestWords.emplace(q, shWord);
559  }
560  }}
561  }
562  }
563  }
564  throw std::logic_error("This NFA doesn't accept any words!");
565 }
566 
568 string findShortestUtf8Word(nfa const& n) {
569  return converter.to_bytes(findShortestWord(n));
570 }
571 
573 
580 nfa::builder nfa::unite(nfa const& n1, nfa const& n2) {
581  return builder(n1).unite(n2);
582 }
583 
585 
592 nfa::builder nfa::intersect(nfa const& n1, nfa const& n2) {
593  return builder(n1).intersect(n2);
594 }
595 
597 
604 nfa::builder nfa::subtract(nfa const& n1, nfa const& n2) {
605  dfa::builder b2(n2);
606  for (auto symbol : n1.getAlphabet()) {
607  b2.addSymbol(symbol);
608  }
609  b2.complement().minimize();
610  return builder(n1).intersect(builder(b2.build()).build());
611 }
612 
614 
622  return nfa::builder(dfa::builder(n).complement().minimize().build());
623 }
624 
626 nfa::nfa(nfa const& n) : p(new pImpl(*(n.p))) {}
627 
629 
630 nfa::nfa(nfa&& n) : p(new pImpl) {p.swap(n.p);}
631 
633 nfa& nfa::operator=(nfa const& n) {
634  if (this != &n) {
635  p.reset(new pImpl(*(n.p)));
636  }
637  return *this;
638 }
639 
641 
643  if (this != &n) {
644  p.reset(new pImpl);
645  p.swap(n.p);
646  }
647  return *this;
648 }
649 
650 nfa::~nfa() = default;
651 
654 
657  string initial;
662 
664  pImpl() = default;
666 
669  string& initial,
673  ) :
674  initial(move(initial)),
676  alphabet(move(alphabet)),
677  transitions(move(transitions)) {}
678 };
679 
682 
684 nfa::builder::builder(nfa const& nfa) : p(new pImpl) {
686  for (char32_t symbol : nfa.getAlphabet()) {
687  addSymbol(symbol);
688  }
689  for (size_t qi = 0; qi < nfa.p->labels.size(); ++qi) {
690  string const& qLabel = nfa.getLabelOf(qi);
691  for (char32_t symbol : p->alphabet) {
692  valarray<bool> ps = nfa.delta(qi, symbol);
693  size_t pi(0);
694  for (bool pb : ps) {
695  if (pb) {
696  addTransition(qLabel, nfa.getLabelOf(pi), symbol);
697  }
698  pi++;
699  }
700  }
701  if (nfa.isAccepting(qi)) {
702  setAccepting(qLabel, true);
703  }
704  }
705 }
706 
708 
710  string initial(dfa.getLabelOf(0));
711  unordered_set<string> acceptingStates;
712  unordered_set<char32_t> alphabet(dfa.getAlphabet().begin(), dfa.getAlphabet().end());
713  Ntransitionmap transitions;
714  transitions[initial];
715  for (size_t q(0); q < dfa.getNumberOfStates(); q++) {
716  for (char32_t symbol : alphabet) {
717  transitions[dfa.getLabelOf(q)][symbol].insert(dfa.getLabelOf(dfa.delta(q, symbol)));
718  }
719  if (dfa.isAccepting(q)) {
720  acceptingStates.insert(dfa.getLabelOf(q));
721  }
722  }
723  p = make_unique<pImpl>(initial, acceptingStates, alphabet, transitions);
724 }
725 
727 nfa::builder::builder(builder const& b) : p(new pImpl(*(b.p))) {}
728 
730 
731 nfa::builder::builder(builder&& b) : p(new pImpl) {p.swap(b.p);}
732 
735  if (this != &b) {
736  p.reset(new pImpl(*(b.p)));
737  }
738  return *this;
739 }
740 
742 
744  if (this != &b) {
745  p.reset(new pImpl);
746  p.swap(b.p);
747  }
748  return *this;
749 }
750 
752 nfa::builder::operator nfa() {
753  return build();
754 }
755 
757 
762  p->alphabet.insert(symbol);
763  return *this;
764 }
765 
767 nfa::builder& nfa::builder::addSymbol(string const& utf8Symbol) {
768  return addSymbol(converter.from_bytes(utf8Symbol)[0]);
769 }
770 
772 
777 nfa::builder& nfa::builder::setAccepting(string const& state, bool accept) {
778  if (p->transitions.empty()) {
779  p->initial = state;
780  }
781  p->transitions[state];
782  if (accept) {
783  p->acceptingStates.insert(state);
784  } else {
785  p->acceptingStates.erase(state);
786  }
787  return *this;
788 }
789 
791 
796  p->initial = state;
797  p->transitions[state];
798  return *this;
799 }
800 
802 
808 nfa::builder& nfa::builder::addTransition(string const& from, string const& to, char32_t symbol) {
809  if (p->transitions.empty()) {
810  p->initial = from;
811  }
812  p->transitions[to];
813  p->transitions[from][symbol].insert(to);
814  addSymbol(symbol);
815  return *this;
816 }
817 
819 nfa::builder& nfa::builder::addTransition(string const& from, string const& to, string const& utf8Symbol) {
820  return addTransition(from, to, converter.from_bytes(utf8Symbol)[0]);
821 }
822 
825  if (!p->transitions.empty()) {
826  vector<string> stateNames;
827  stateNames.reserve(p->transitions.size());
828  stateNames.push_back(p->initial);
829  for (auto const& fromTo : p->transitions) {
830  if (fromTo.first != p->initial) {
831  stateNames.push_back(fromTo.first);
832  }
833  }
834  Ntransitionmap newTr(p->transitions.size());
835  unordered_set<string> newAcc(p->acceptingStates.size());
836  for (size_t q = 0; q < stateNames.size(); q++) {
837  auto const& vias = p->transitions[stateNames[q]];
838  unordered_map<char32_t,unordered_set<string>> newVias(vias.size());
839  for (auto const& viaTo : vias) {
840  unordered_set<string> newTos(viaTo.second.size());
841  for (size_t p = 0; p < stateNames.size(); p++) {
842  if (viaTo.second.find(stateNames[p]) != viaTo.second.cend()) {
843  newTos.insert(prefix + std::to_string(p));
844  }
845  }
846  newVias.insert(make_pair(viaTo.first, newTos));
847  }
848  newTr.insert(make_pair(prefix + std::to_string(q), newVias));
849  if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end()) {
850  newAcc.insert(prefix + std::to_string(q));
851  }
852  }
853  p->initial = string(string(prefix).append("0"));
854  p->transitions = newTr;
855  p->acceptingStates = newAcc;
856  }
857  return *this;
858 }
859 
861 
869  string newInitial("q0");
870  if (!p->transitions.empty()) {
871  Ntransitionmap tempTr(p->transitions.size());
872  for (auto const& fromVia : p->transitions) {
873  unordered_map<char32_t,unordered_set<string>> tempVia(fromVia.second.size());
874  for (auto const& viaTo : fromVia.second) {
875  unordered_set<string> tempTo(viaTo.second.size());
876  for (auto const& to : viaTo.second) {
877  tempTo.insert(string(to.length() + 1, '1').replace(1, string::npos, to));
878  }
879  tempVia.insert(make_pair(viaTo.first, tempTo));
880  }
881  tempTr.insert(make_pair(string(fromVia.first.length() + 1, '1').replace(1, string::npos, fromVia.first), tempVia));
882  }
883  p->transitions = tempTr;
884  unordered_set<string> tempAcc(p->acceptingStates.size());
885  for (auto const& acc : p->acceptingStates) {
886  tempAcc.insert(string(acc.length() + 1, '1').replace(1, string::npos, acc));
887  }
888  p->acceptingStates = tempAcc;
889  p->initial = string(p->initial.length() + 1, '1').replace(1, string::npos, p->initial);
890  addTransition(newInitial, p->initial, U'\0');
891  }
892  makeInitial(newInitial);
893  auto & oAlphabet = other.getAlphabet();
894  p->alphabet.insert(oAlphabet.cbegin(), oAlphabet.cend());
895  for (size_t q = 0; q < other.getNumberOfStates(); q++) {
896  auto & qLabel = other.getLabelOf(q);
897  string newQLabel = string(qLabel.length() + 1, '2').replace(1, string::npos, qLabel);
898  unordered_map<char32_t,unordered_set<string>> tempVia(oAlphabet.size() + 1);
899  for (char32_t symbol : oAlphabet) {
900  valarray<bool> const& to = other.delta(q, symbol);
902  for (size_t p = 0; p < other.getNumberOfStates(); p++) { if (to[p]) {
903  auto & pLabel = other.getLabelOf(p);
904  tempTo.insert(string(pLabel.length() + 1, '2').replace(1, string::npos, pLabel));
905  }}
906  tempVia.insert(make_pair(symbol, tempTo));
907  }
908  p->transitions.insert(make_pair(newQLabel, tempVia));
909  if (other.isAccepting(q)) {
910  p->acceptingStates.insert(newQLabel);
911  }
912  if (q == 0) {
913  addTransition(newInitial, newQLabel, U'\0');
914  }
915  }
916  return *this;
917 }
918 
920 
927  if (!p->transitions.empty()) {
928  vector<string> stateNames;
929  stateNames.reserve(p->transitions.size());
930  stateNames.push_back(p->initial);
931  for (auto const& fromTo : p->transitions) {
932  if (fromTo.first != p->initial) {
933  stateNames.push_back(fromTo.first);
934  }
935  }
936  auto const& oAlph = other.getAlphabet();
937  size_t commonSymbols = 0;
938  for (char32_t symbol : p->alphabet) {
939  if (index_of(oAlph, symbol) < oAlph.size()) {
940  commonSymbols++;
941  } else {
942  for (auto & fromVia : p->transitions) {
943  fromVia.second.erase(symbol);
944  }
945  }
946  }
947  p->alphabet.insert(oAlph.begin(), oAlph.end());
948  Ntransitionmap newTr(stateNames.size() * other.getNumberOfStates());
949  unordered_set<string> newAcc(stateNames.size() * other.getNumberOfStates());
950  unordered_map<size_t, unordered_map<size_t, string const*>> pairToName(stateNames.size() * other.getNumberOfStates());
951  for (size_t q = 0; q < stateNames.size(); q++) {
952  for (size_t qq = 0; qq < other.getNumberOfStates(); qq++) {
953  string potentialName = stateNames[q] + other.getLabelOf(qq);
954  for (auto nameIt = newTr.find(potentialName); nameIt != newTr.end(); nameIt = newTr.find(potentialName)) {
955  potentialName.append("_");
956  }
957  auto nameIt = newTr.emplace(potentialName, commonSymbols);
958  pairToName.emplace(q, 1).first->second.emplace(qq, &(nameIt.first->first));
959  if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end() && other.isAccepting(qq)) {
960  newAcc.insert(potentialName);
961  }
962  }
963  p->transitions[stateNames[q]][U'\0'].insert(stateNames[q]);
964  // Needed due to the equivalence of standing still to “taking” an ε-transition.
965  }
966  for (size_t q = 0; q < stateNames.size(); q++) {
967  auto const& qLabel = stateNames[q];
968  auto const& viaTos = p->transitions[qLabel];
969  for (auto const& viaTo : viaTos) {
970  for (size_t qq = 0; qq < other.getNumberOfStates(); qq++) {
971  valarray<bool> const& reached = other.delta(qq, viaTo.first);
972  for (auto const& to : viaTo.second) {
973  size_t p = index_of(stateNames, to);
974  for (size_t pp = 0; pp < reached.size(); pp++) {
975  if (reached[pp] || (pp == qq && viaTo.first == U'\0')) {
976  // Needed due to the equivalence of standing still to “taking” an ε-transition.
977  newTr[*(pairToName[q][qq])][viaTo.first].insert(*(pairToName[p][pp]));
978  }
979  }
980  }
981  }
982  }
983  }
984  for (auto & fromVia : newTr) {
985  auto const& from = fromVia.first;
986  auto to = fromVia.second.find(U'\0');
987  to->second.erase(from);
988  if (to->second.empty()) {
989  fromVia.second.erase(to);
990  }
991  }
992  p->transitions = newTr;
993  p->acceptingStates = newAcc;
994  p->initial = *(pairToName[0][0]);
995  }
996  return *this;
997 }
998 
1000 
1004  p->transitions[p->initial];
1005  p->alphabet.insert(U'\0');
1006  vector<char32_t> alphabetV(p->alphabet.begin(), p->alphabet.end());
1007  std::sort(alphabetV.begin(), alphabetV.end());
1008  vector<string> labelV(p->transitions.size());
1009  labelV[0] = p->initial;
1010  valarray<bool> acceptingV(p->transitions.size());
1011  acceptingV[0] = (p->acceptingStates.find(p->initial) != p->acceptingStates.end());
1012  size_t i(1);
1013  for (auto entry : p->transitions) {
1014  if (entry.first == p->initial) {
1015  continue;
1016  }
1017  labelV[i] = entry.first;
1018  acceptingV[i++] = (
1019  p->acceptingStates.find(entry.first) != p->acceptingStates.end()
1020  );
1021  }
1022  vector<vector<valarray<bool>>> transitionV(
1023  p->transitions.size(),
1025  p->alphabet.size()+1,
1026  valarray<bool>(labelV.size())
1027  )
1028  );
1029  unordered_set<string> emptySet;
1030  size_t qi(0);
1031  for (auto const& q : labelV) {
1032  unordered_map<char32_t,unordered_set<string>> const& fromQ = p->transitions[q];
1033  size_t si(0);
1034  for (char32_t symbol : alphabetV) {
1035  auto to = fromQ.find(symbol);
1036  unordered_set<string> const& viaSymbol(to != fromQ.end() ? to->second : emptySet);
1037  i = 0;
1038  for (auto const& p : labelV) {
1039  transitionV[qi][si][i++] = (viaSymbol.count(p) != 0);
1040  }
1041  si++;
1042  }
1043  qi++;
1044  }
1045  return {alphabetV, transitionV, labelV, acceptingV};
1046 }
1047 
1048 nfa::builder::~builder() = default;
1049 } // namespace reg
Ntransitionmap transitions
Transition function (state × symbol → set of states) of the prospective NFA.
Definition: nfa.cpp:660
bool isAccepting(size_t qi) const
Tests whether a state is an accept state within this DFA.
Definition: dfa.cpp:391
nfa build()
Builds the NFA, as defined by previous operations.
Definition: nfa.cpp:1003
static nfa::builder complement(nfa const &n)
Creates a builder for an NFA accepting the complement of the language of an NFA.
Definition: nfa.cpp:621
std::string getShortestUtf8Word() const
Definition: nfa.cpp:253
size_t getNumberOfSymbols() const
Definition: nfa.cpp:463
Represents nondeterministic finite automata with ε-moves.
Definition: nfa.h:23
std::vector< char32_t > const & getAlphabet() const
Fetches this NFA's set of processable symbols.
Definition: nfa.cpp:445
std::string const & getLabelOf(size_t q) const
Definition: nfa.cpp:335
Represents deterministic finite automata.
Definition: dfa.h:21
std::vector< std::string > const & getUtf8Alphabet() const
Fetches this NFA's set of processable symbols as UTF-8-encoded strings.
Definition: nfa.cpp:453
vector< vector< valarray< bool > > > transitions
Stores the transition function as a table viz state index × symbol index → list of bools with true a...
Definition: nfa.cpp:36
Constructs NFAs step by step.
Definition: nfa.h:91
std::wstring_convert< std::codecvt_utf8< char32_t >, char32_t > converter
Converts between UTF-8-encoded and UTF-32-encoded strings.
Definition: utils.cpp:4
bool operator==(nfa const &n) const
Checks whether this NFA describes the same regular language as another object.
Definition: nfa.cpp:88
pImpl()
Constructs private implementation object for an NFA accepting the empty language ∅.
Definition: nfa.cpp:39
vector< string > labels
Stores the names of states.
Definition: nfa.cpp:35
static nfa::builder intersect(nfa const &n1, nfa const &n2)
Creates a builder for an NFA accepting the intersection of the languages of two NFAs.
Definition: nfa.cpp:592
std::string to_string(std::valarray< bool > const &qs) const
Puts a name to a set of indices.
Definition: nfa.cpp:344
builder & unite(nfa const &other)
Makes the prospective NFA also accept every word of another NFA&#39;s language.
Definition: nfa.cpp:868
size_t delta(size_t qi, size_t si) const
Computes this DFA's transition function for a state index and a symbol index.
Definition: dfa.cpp:239
nfa & operator=(nfa const &n)
Copy-assigns this NFA by copying another one's private implementation object.
Definition: nfa.cpp:633
std::vector< char32_t > const & getAlphabet() const
Fetches this DFA's set of processable symbols.
Definition: dfa.cpp:363
string initial
Name of the prospective NFA&#39;s initial state.
Definition: nfa.cpp:657
std::valarray< bool > encodeSet(state_set const &qs) const
Translates a set of states from set to bool representation.
Definition: nfa.cpp:401
bool operator!=(nfa const &n) const
Checks whether this NFA describes a different regular language than another object.
Definition: nfa.cpp:96
builder & addSymbol(char32_t symbol)
Adds a symbol to the prospective NFA's alphabet.
Definition: nfa.cpp:761
state_set getStatesSet() const
Fetches this NFA's set of states as a set of (references to) strings.
Definition: nfa.cpp:429
builder & setAccepting(std::string const &state, bool accept)
Sets whether or not a state will be accepting within the prospective NFA.
Definition: nfa.cpp:777
builder & normalizeStateNames(std::string const &prefix)
Reduces the prospective NFA&#39;s state names to consecutive numbers, prefixed with a given string...
Definition: nfa.cpp:824
size_t index_of(C const &container, T const &element)
Basically Java&#39;s List interface&#39;s indexOf, but as a non-member function and returning the container&#39;s...
Definition: utils.h:19
Contains the reg::nfa class definition.
valarray< bool > accepting
A true value marks an index as belonging to an accept state.
Definition: nfa.cpp:31
Contains the reg::expression class defintion.
vector< char32_t > alphabet
Represents the set of processable symbols.
Definition: nfa.cpp:32
state_set decodeSet(std::valarray< bool > const &qs) const
Translates a set of states from bool to set representation.
Definition: nfa.cpp:384
bool isAccepting(size_t qi) const
Tests whether a state is an accept state within this NFA.
Definition: nfa.cpp:473
std::u32string getShortestWord() const
Definition: nfa.cpp:248
static nfa::builder unite(nfa const &n1, nfa const &n2)
Creates a builder for an NFA accepting the union of the languages of two NFAs.
Definition: nfa.cpp:580
builder & complement()
Inverts the prospective DFA&#39;s language with respect to all possible strings over its alphabet...
Definition: dfa.cpp:1007
Private implementation details of NFAs.
Definition: nfa.cpp:29
std::unordered_set< std::reference_wrapper< std::string const >, hash_reference_string_const, equal_to_reference_string_const > state_set
Nicer name for sets of names of states. Should store references to existing state names...
Definition: nfa.h:26
unordered_set< char32_t > alphabet
Set of symbols processable by the prospective NFA.
Definition: nfa.cpp:659
Constructs DFAs step by step.
Definition: dfa.h:67
vector< valarray< bool > > epsClosures
Cache for every state&#39;s ε-closures.
Definition: nfa.cpp:34
size_t getNumberOfStates() const
Definition: dfa.cpp:376
builder & makeInitial(std::string const &state)
Resets the initial state for the prospective NFA.
Definition: nfa.cpp:795
vector< string > utf8Alphabet
Represents the set of processable symbols as UTF-8-encoded strings.
Definition: nfa.cpp:33
builder & intersect(nfa const &other)
Makes the prospective NFA accept only words accepted also by another NFA.
Definition: nfa.cpp:926
Private implementation details of NFA builders.
Definition: nfa.cpp:656
std::shared_ptr< dfa const > equivalent
Stores a minimal DFA accepting the same language as this object&#39;s NFA.
Definition: nfa.cpp:30
size_t getNumberOfStates() const
Definition: nfa.cpp:458
static nfa::builder subtract(nfa const &n1, nfa const &n2)
Creates a builder for an NFA accepting the set difference of the languages of two NFAs...
Definition: nfa.cpp:604
pImpl(string &initial, unordered_set< string > &acceptingStates, unordered_set< char32_t > &alphabet, Ntransitionmap &transitions)
Constructs private implementation object with provided members.
Definition: nfa.cpp:668
friend std::u32string findShortestWord(nfa const &n)
Searches the shortest UTF-32-encoded word accepted by a given NFA.
Definition: nfa.cpp:539
std::valarray< bool > getStatesBools() const
Fetches this NFA's set of states encoded as an array of bools.
Definition: nfa.cpp:437
std::valarray< bool > deltaHat(size_t qi, std::u32string const &word) const
Computes this NFA's transition function recursively for a string of symbols, starting in a state spec...
Definition: nfa.cpp:168
pImpl(vector< char32_t > &alphabet, vector< vector< valarray< bool >>> &transitions, vector< string > &labels, valarray< bool > &acceptingStates)
Constructs private implementation object with provided members and empty ε-closure cache...
Definition: nfa.cpp:49
bool hasAccepting(std::valarray< bool > const &qs) const
Definition: nfa.cpp:524
Where this library lives.
Definition: dfa.cpp:51
nfa()
Constructs an NFA accepting the empty language ∅.
Definition: nfa.cpp:68
string findShortestUtf8Word(dfa const &d)
Same as above for a UTF-8-encoded word.
Definition: dfa.cpp:445
builder & operator=(builder const &b)
Copy-assigns a builder by copying another one's private implementation object.
Definition: nfa.cpp:734
std::string const & getInitialState() const
Names this NFA's initial state.
Definition: nfa.cpp:413
u32string findShortestWord(dfa const &d)
Searches the shortest UTF-32-encoded word accepted by a given DFA.
Definition: dfa.cpp:418
unordered_set< string > acceptingStates
Set of names of the prospective NFA&#39;s accept states.
Definition: nfa.cpp:658
builder & addTransition(std::string const &from, std::string const &to, char32_t symbol)
Adds a transition for the prospective NFA.
Definition: nfa.cpp:808
std::valarray< bool > const & delta(size_t qi, size_t si) const
Computes this NFA's transition function for a state index and a symbol index.
Definition: nfa.cpp:108
dfa build()
Builds the DFA, as defined by previous operations, including completion.
Definition: dfa.cpp:1052
std::string const & getLabelOf(size_t qi) const
Definition: dfa.cpp:339
std::valarray< bool > const & epsilonClosure(size_t qi) const
Computes a state's ε-closure.
Definition: nfa.cpp:263
std::vector< std::string > const & getStates() const
Fetches this NFA's set of states in order of internal representation.
Definition: nfa.cpp:421
pImpl()=default
Constructs empty private implementation object.
Contains the reg::dfa class definition.
builder & addSymbol(char32_t symbol)
Adds a symbol to the prospective DFA's alphabet.
Definition: dfa.cpp:675
builder()
Constructs a blank builder object.
Definition: nfa.cpp:681
friend std::string findShortestUtf8Word(nfa const &n)
Same as above for a UTF-8-encoded word.
Definition: nfa.cpp:568