reglibcpp  1.6.0
(Naïve) C++ implementation of models for regular languages
nfa.cpp
Go to the documentation of this file.
1 #include "nfa.h"
4 using std::valarray;
5 using std::vector;
7 using std::string;
8 using std::make_unique;
9 using std::make_pair;
10 using std::u32string;
11 
12 using std::move;
13 
14 #include <unordered_map>
15 using std::unordered_map;
16 
17 #include <algorithm>
18 using std::find;
19 using std::min;
20 
21 #include <iterator>
22 
23 #include "dfa.h"
24 #include "expression.h"
25 
26 namespace reg {
27 
29 struct nfa::pImpl {
37 
39  pImpl() :
40  equivalent(),
41  accepting(false, 1),
42  alphabet(1, U'\0'),
43  utf8Alphabet(1, ""),
44  epsClosures(1),
45  labels(1),
46  transitions(1, vector<valarray<bool>>(1, valarray<bool>(false, 1))) {}
47 
50  vector<string>& labels, valarray<bool>& acceptingStates) :
51  accepting(move(acceptingStates)),
52  alphabet(move(alphabet)),
53  labels(move(labels)),
54  transitions(move(transitions)) {
55  utf8Alphabet.reserve(this->alphabet.size());
56  for (char32_t symbol : this->alphabet) {
57  if (symbol == U'\0') {
58  utf8Alphabet.push_back("");
59  } else {
60  utf8Alphabet.push_back(converter.to_bytes(symbol));
61  }
62  }
64  }
65 
67 
70  dfa const& getEquivalentDfa(nfa const* owner) {
71  if (!equivalent) {
72  equivalent.reset(new dfa(dfa::builder(*owner).minimize()));
73  }
74  return *equivalent;
75  }
76 };
77 
79 nfa::nfa() : p(new pImpl) {}
80 
82 nfa::nfa(vector<char32_t>& alphabet, vector<vector<valarray<bool>>>& transitions,
83  vector<string>& labels, valarray<bool>& acceptingStates) :
84  p(new pImpl(alphabet, transitions, labels, acceptingStates)) {}
85 
87 bool nfa::operator==(nfa const& n) const {
88  return p->getEquivalentDfa(this) == n.p->getEquivalentDfa(&n);
89 }
90 
92 bool nfa::operator!=(nfa const& n) const {
93  return p->getEquivalentDfa(this) != n.p->getEquivalentDfa(&n);
94 }
95 
97 
104 valarray<bool> const& nfa::delta(size_t qi, size_t si) const {
105  if (si >= p->alphabet.size()) {
106  throw std::domain_error("There is no symbol with index " + std::to_string(si));
107  }
108  if (qi >= p->labels.size()) {
109  throw std::out_of_range("There is no state with index " + std::to_string(qi));
110  }
111  return p->transitions[qi][si];
112 }
113 
115 
122 valarray<bool> const& nfa::delta(size_t qi, char32_t symbol) const {
123  try {
124  return delta(qi, index_of(p->alphabet, symbol));
125  } catch (std::domain_error e) {
126  throw std::domain_error(u8"δ is not defined for symbol " + converter.to_bytes(symbol));
127  }
128 }
129 
131 valarray<bool> const& nfa::delta(size_t qi, string const& utf8Symbol) const {
132  return delta(qi, converter.from_bytes(utf8Symbol)[0]);
133 }
134 
136 
143 nfa::state_set nfa::delta(string const& q, char32_t symbol) const {
144  try {
145  return decodeSet(delta(index_of(getStates(), q), symbol));
146  } catch (std::out_of_range e) {
147  throw std::out_of_range("There is no state named " + q);
148  }
149 }
150 
152 nfa::state_set nfa::delta(string const& q, string const& utf8Symbol) const {
153  return delta(q, converter.from_bytes(utf8Symbol)[0]);
154 }
155 
157 
164 valarray<bool> nfa::deltaHat(size_t qi, u32string const& word) const {
165  if (qi >= p->labels.size()) {
166  throw std::out_of_range("There is no state with index " + std::to_string(qi));
167  }
168  valarray<bool> qs(p->labels.size());
169  qs[qi] = true;
170  return deltaHat(qs, word);
171 }
172 
174 valarray<bool> nfa::deltaHat(size_t qi, string const& utf8Word) const {
175  return deltaHat(qi, converter.from_bytes(utf8Word));
176 }
177 
179 
186 nfa::state_set nfa::deltaHat(string const& q, u32string const& word) const {
187  try {
188  return decodeSet(deltaHat(index_of(getStates(), q), word));
189  } catch (std::out_of_range e) {
190  throw std::out_of_range("There is no state named " + q);
191  }
192 }
193 
195 nfa::state_set nfa::deltaHat(string const& q, string const& utf8Word) const {
196  return deltaHat(q, converter.from_bytes(utf8Word));
197 }
198 
200 
206 valarray<bool> nfa::deltaHat(valarray<bool> const& qs, u32string const& word) const {
208  for (char32_t symbol : word) {
209  valarray<bool> reached(p->labels.size());
210  size_t qi(0);
211  for (bool qb : ps) {
212  if (qb) {
213  reached |= epsilonClosure(delta(qi, symbol));
214  }
215  qi++;
216  }
217  ps = move(reached);
218  }
219  return ps;
220 }
221 
223 valarray<bool> nfa::deltaHat(valarray<bool> const& qs, string const& utf8Word) const {
224  return deltaHat(qs, converter.from_bytes(utf8Word));
225 }
226 
228 
234 nfa::state_set nfa::deltaHat(nfa::state_set const& qs, u32string const& word) const {
235  return decodeSet(deltaHat(encodeSet(qs), word));
236 }
237 
239 nfa::state_set nfa::deltaHat(nfa::state_set const& qs, string const& utf8Word) const {
240  return deltaHat(qs, converter.from_bytes(utf8Word));
241 }
242 
245  return findShortestWord(*this);
246 }
247 
249 string nfa::getShortestUtf8Word() const {
250  return findShortestUtf8Word(*this);
251 }
252 
254 
259 valarray<bool> const& nfa::epsilonClosure(size_t qi) const {
260  if (qi >= p->labels.size()) {
261  throw std::out_of_range("There is no state with index " + std::to_string(qi));
262  }
263  if (p->epsClosures[qi].size() == 0) {
264  p->epsClosures[qi].resize(p->labels.size());
265  p->epsClosures[qi][qi] = true;
266  int growth(1);
267  while (growth > 0) {
268  growth = 0;
269  valarray<bool> old(p->epsClosures[qi]);
270  size_t qqi(0);
271  for (bool qqb : old) {
272  if (qqb) {
273  size_t pi(0);
274  for (bool pb : p->transitions[qqi][0]) {
275  if (pb) {
276  if (!p->epsClosures[qi][pi]) {
277  growth++;
278  p->epsClosures[qi][pi] = true;
279  }
280  }
281  pi++;
282  } // going through the true bools in transitions[qqi][0]
283  }
284  qqi++;
285  } // going through the true bools in old
286  }
287  }
288  return p->epsClosures[qi];
289 }
290 
292 
297 nfa::state_set nfa::epsilonClosure(string const& q) const {
298  try {
300  } catch (std::out_of_range e) {
301  throw std::out_of_range("There is no state named " + q);
302  }
303 }
304 
306 
311  valarray<bool> closure(p->labels.size());
312  size_t range = min(qs.size(), getNumberOfStates());
313  for (size_t q = 0; q < range; q++) {
314  if (qs[q]) {
315  closure |= epsilonClosure(q);
316  }
317  }
318  return closure;
319 }
320 
322 
327  return decodeSet(epsilonClosure(encodeSet(qs)));
328 }
329 
331 string const& nfa::getLabelOf(size_t qi) const {
332  return p->labels.at(qi);
333 }
334 
336 
340 string nfa::to_string(valarray<bool> const& qs) const {
341  string label;
342  size_t range = min(qs.size(), getNumberOfStates());
343  for (size_t q = 0; q < range; q++) {
344  if (qs[q]) {
345  label = label.append(1, label.length() == 0 ? '{' : ',');
346  label = label.append(p->labels.at(q));
347  }
348  }
349  if (label.length() == 0) {
350  return string("{}");
351  } else {
352  return label.append(1, '}');
353  }
354 }
355 
357 
361 string nfa::to_string(nfa::state_set const& qs) const {
362  return getLabelOf(encodeSet(qs));
363 }
364 
366 string nfa::getLabelOf(valarray<bool> const& qs) const {
367  return to_string(qs);
368 }
369 
371 string nfa::getLabelOf(nfa::state_set const& qs) const {
372  return to_string(qs);
373 }
374 
376 
381  state_set states;
382  states.reserve(getNumberOfStates());
383  size_t range = min(qs.size(), getNumberOfStates());
384  for (size_t q = 0; q < range; q++) {
385  if (qs[q]) {
386  states.emplace(getStates()[q]);
387  }
388  }
389  return states;
390 }
391 
393 
399  for (size_t qi = 0; qi < getNumberOfStates(); qi++) {
400  states[qi] = qs.count(getStates()[qi]);
401  }
402  return states;
403 }
404 
406 
409 string const& nfa::getInitialState() const {
410  return getStates()[0];
411 }
412 
414 
418  return p->labels;
419 }
420 
422 
426  return state_set(getStates().begin(), getStates().end());
427 }
428 
430 
434  return valarray<bool>(true, getNumberOfStates());
435 }
436 
438 
442  return p->alphabet;
443 }
444 
446 
450  return p->utf8Alphabet;
451 }
452 
454 size_t nfa::getNumberOfStates() const {
455  return getStates().size();
456 }
457 
459 size_t nfa::getNumberOfSymbols() const {
460  return getAlphabet().size();
461 }
462 
464 
469 bool nfa::isAccepting(size_t qi) const {
470  if (qi >= p->labels.size()) {
471  throw std::out_of_range("There is no state with index " + std::to_string(qi));
472  }
473  return p->accepting[qi];
474 }
475 
477 
482 bool nfa::isAccepting(string const& q) const {
483  try {
484  return isAccepting(index_of(getStates(), q));
485  } catch (std::out_of_range e) {
486  throw std::out_of_range("There is no state named " + q);
487  }
488 }
489 
491 
495 bool nfa::isAccepting(valarray<bool> const& qs) const {
496  size_t range = min(qs.size(), getNumberOfStates());
497  for (size_t q = 0; q < range; q++) {
498  if (qs[q] && p->accepting[q]) {
499  return true;
500  }
501  }
502  return false;
503 }
504 
506 
510 bool nfa::isAccepting(nfa::state_set const& qs) const {
511  for (size_t qi = 0; qi < getNumberOfStates(); qi++) {
512  if (p->accepting[qi] && qs.count(getStates()[qi])) {
513  return true;
514  }
515  }
516  return false;
517 }
518 
520 bool nfa::hasAccepting(valarray<bool> const& qs) const {
521  return isAccepting(qs);
522 }
523 
525 bool nfa::hasAccepting(nfa::state_set const& qs) const {
526  return isAccepting(qs);
527 }
528 
530 
536  auto const& p = n.p;
537  if (p->accepting[0]) {
538  return U"";
539  }
540  unordered_map<size_t, u32string> shortestWords(p->labels.size());
541  size_t oldSize = 0;
542  shortestWords.emplace(0, U"");
543  while (shortestWords.size() > oldSize) {
544  oldSize = shortestWords.size();
545  for (auto const& stateWord : shortestWords) {
546  for (auto symbol : p->alphabet) {
547  valarray<bool> reached = n.deltaHat(stateWord.first, u32string(!!symbol, symbol));
548  u32string shWord = stateWord.second + u32string(!!symbol, symbol);
549  for (size_t q = 0; q < reached.size(); q++) { if (reached[q]) {
550  if (p->accepting[q]) {
551  return shWord;
552  }
553  if (shortestWords.find(q) == shortestWords.end()) {
554  shortestWords.emplace(q, shWord);
555  }
556  }}
557  }
558  }
559  }
560  throw std::logic_error("This NFA doesn't accept any words!");
561 }
562 
564 string findShortestUtf8Word(nfa const& n) {
565  return converter.to_bytes(findShortestWord(n));
566 }
567 
569 
576 nfa::builder nfa::unite(nfa const& n1, nfa const& n2) {
577  return builder(n1).unite(n2);
578 }
579 
581 
588 nfa::builder nfa::intersect(nfa const& n1, nfa const& n2) {
589  return builder(n1).intersect(n2);
590 }
591 
593 
600 nfa::builder nfa::subtract(nfa const& n1, nfa const& n2) {
601  dfa::builder b2(n2);
602  for (auto symbol : n1.getAlphabet()) {
603  b2.addSymbol(symbol);
604  }
605  b2.complement().minimize();
606  return builder(n1).intersect(builder(b2.build()).build());
607 }
608 
610 
618  return nfa::builder(dfa::builder(n).complement().minimize().build());
619 }
620 
622 nfa::nfa(nfa const& n) : p(new pImpl(*(n.p))) {}
623 
625 
626 nfa::nfa(nfa&& n) : p(new pImpl) {p.swap(n.p);}
627 
629 nfa::nfa(nfa::builder& b) : nfa(b.build()) {}
630 
632 nfa::nfa(dfa const& d) : nfa(builder(d).build()) {}
633 
635 nfa& nfa::operator=(nfa const& n) {
636  if (this != &n) {
637  p.reset(new pImpl(*(n.p)));
638  }
639  return *this;
640 }
641 
643 
645  if (this != &n) {
646  p.reset(new pImpl);
647  p.swap(n.p);
648  }
649  return *this;
650 }
651 
652 nfa::~nfa() = default;
653 
656 
659  string initial;
664 
666  pImpl() = default;
668 
671  string& initial,
675  ) :
676  initial(move(initial)),
678  alphabet(move(alphabet)),
679  transitions(move(transitions)) {}
680 };
681 
684 
686 nfa::builder::builder(nfa const& nfa) : p(new pImpl) {
688  for (char32_t symbol : nfa.getAlphabet()) {
689  addSymbol(symbol);
690  }
691  for (size_t qi = 0; qi < nfa.p->labels.size(); ++qi) {
692  string const& qLabel = nfa.getLabelOf(qi);
693  for (char32_t symbol : p->alphabet) {
694  valarray<bool> ps = nfa.delta(qi, symbol);
695  size_t pi(0);
696  for (bool pb : ps) {
697  if (pb) {
698  addTransition(qLabel, nfa.getLabelOf(pi), symbol);
699  }
700  pi++;
701  }
702  }
703  if (nfa.isAccepting(qi)) {
704  setAccepting(qLabel, true);
705  }
706  }
707 }
708 
710 
712  string initial(dfa.getLabelOf(0));
713  unordered_set<string> acceptingStates;
714  unordered_set<char32_t> alphabet(dfa.getAlphabet().begin(), dfa.getAlphabet().end());
715  Ntransitionmap transitions;
716  transitions[initial];
717  for (size_t q(0); q < dfa.getNumberOfStates(); q++) {
718  for (char32_t symbol : alphabet) {
719  transitions[dfa.getLabelOf(q)][symbol].insert(dfa.getLabelOf(dfa.delta(q, symbol)));
720  }
721  if (dfa.isAccepting(q)) {
722  acceptingStates.insert(dfa.getLabelOf(q));
723  }
724  }
725  p = make_unique<pImpl>(initial, acceptingStates, alphabet, transitions);
726 }
727 
729 nfa::builder::builder(builder const& b) : p(new pImpl(*(b.p))) {}
730 
732 
733 nfa::builder::builder(builder&& b) : p(new pImpl) {p.swap(b.p);}
734 
737  if (this != &b) {
738  p.reset(new pImpl(*(b.p)));
739  }
740  return *this;
741 }
742 
744 
746  if (this != &b) {
747  p.reset(new pImpl);
748  p.swap(b.p);
749  }
750  return *this;
751 }
752 
754 
759  p->alphabet.insert(symbol);
760  return *this;
761 }
762 
764 nfa::builder& nfa::builder::addSymbol(string const& utf8Symbol) {
765  return addSymbol(converter.from_bytes(utf8Symbol)[0]);
766 }
767 
769 
774 nfa::builder& nfa::builder::setAccepting(string const& state, bool accept) {
775  if (p->transitions.empty()) {
776  p->initial = state;
777  }
778  p->transitions[state];
779  if (accept) {
780  p->acceptingStates.insert(state);
781  } else {
782  p->acceptingStates.erase(state);
783  }
784  return *this;
785 }
786 
788 
793  p->initial = state;
794  p->transitions[state];
795  return *this;
796 }
797 
799 
805 nfa::builder& nfa::builder::addTransition(string const& from, string const& to, char32_t symbol) {
806  if (p->transitions.empty()) {
807  p->initial = from;
808  }
809  p->transitions[to];
810  p->transitions[from][symbol].insert(to);
811  addSymbol(symbol);
812  return *this;
813 }
814 
816 nfa::builder& nfa::builder::addTransition(string const& from, string const& to, string const& utf8Symbol) {
817  return addTransition(from, to, converter.from_bytes(utf8Symbol)[0]);
818 }
819 
822  if (!p->transitions.empty()) {
823  vector<string> stateNames;
824  stateNames.reserve(p->transitions.size());
825  stateNames.push_back(p->initial);
826  for (auto const& fromTo : p->transitions) {
827  if (fromTo.first != p->initial) {
828  stateNames.push_back(fromTo.first);
829  }
830  }
831  Ntransitionmap newTr(p->transitions.size());
832  unordered_set<string> newAcc(p->acceptingStates.size());
833  for (size_t q = 0; q < stateNames.size(); q++) {
834  auto const& vias = p->transitions[stateNames[q]];
835  unordered_map<char32_t,unordered_set<string>> newVias(vias.size());
836  for (auto const& viaTo : vias) {
837  unordered_set<string> newTos(viaTo.second.size());
838  for (size_t p = 0; p < stateNames.size(); p++) {
839  if (viaTo.second.find(stateNames[p]) != viaTo.second.cend()) {
840  newTos.insert(prefix + std::to_string(p));
841  }
842  }
843  newVias.insert(make_pair(viaTo.first, newTos));
844  }
845  newTr.insert(make_pair(prefix + std::to_string(q), newVias));
846  if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end()) {
847  newAcc.insert(prefix + std::to_string(q));
848  }
849  }
850  p->initial = string(string(prefix).append("0"));
851  p->transitions = newTr;
852  p->acceptingStates = newAcc;
853  }
854  return *this;
855 }
856 
858 
866  string newInitial("q0");
867  if (!p->transitions.empty()) {
868  Ntransitionmap tempTr(p->transitions.size());
869  for (auto const& fromVia : p->transitions) {
870  unordered_map<char32_t,unordered_set<string>> tempVia(fromVia.second.size());
871  for (auto const& viaTo : fromVia.second) {
872  unordered_set<string> tempTo(viaTo.second.size());
873  for (auto const& to : viaTo.second) {
874  tempTo.insert(string(to.length() + 1, '1').replace(1, string::npos, to));
875  }
876  tempVia.insert(make_pair(viaTo.first, tempTo));
877  }
878  tempTr.insert(make_pair(string(fromVia.first.length() + 1, '1').replace(1, string::npos, fromVia.first), tempVia));
879  }
880  p->transitions = tempTr;
881  unordered_set<string> tempAcc(p->acceptingStates.size());
882  for (auto const& acc : p->acceptingStates) {
883  tempAcc.insert(string(acc.length() + 1, '1').replace(1, string::npos, acc));
884  }
885  p->acceptingStates = tempAcc;
886  p->initial = string(p->initial.length() + 1, '1').replace(1, string::npos, p->initial);
887  addTransition(newInitial, p->initial, U'\0');
888  }
889  makeInitial(newInitial);
890  auto & oAlphabet = other.getAlphabet();
891  p->alphabet.insert(oAlphabet.cbegin(), oAlphabet.cend());
892  for (size_t q = 0; q < other.getNumberOfStates(); q++) {
893  auto & qLabel = other.getLabelOf(q);
894  string newQLabel = string(qLabel.length() + 1, '2').replace(1, string::npos, qLabel);
895  unordered_map<char32_t,unordered_set<string>> tempVia(oAlphabet.size() + 1);
896  for (char32_t symbol : oAlphabet) {
897  valarray<bool> const& to = other.delta(q, symbol);
899  for (size_t p = 0; p < other.getNumberOfStates(); p++) { if (to[p]) {
900  auto & pLabel = other.getLabelOf(p);
901  tempTo.insert(string(pLabel.length() + 1, '2').replace(1, string::npos, pLabel));
902  }}
903  tempVia.insert(make_pair(symbol, tempTo));
904  }
905  p->transitions.insert(make_pair(newQLabel, tempVia));
906  if (other.isAccepting(q)) {
907  p->acceptingStates.insert(newQLabel);
908  }
909  if (q == 0) {
910  addTransition(newInitial, newQLabel, U'\0');
911  }
912  }
913  return *this;
914 }
915 
917 
924  if (!p->transitions.empty()) {
925  vector<string> stateNames;
926  stateNames.reserve(p->transitions.size());
927  stateNames.push_back(p->initial);
928  for (auto const& fromTo : p->transitions) {
929  if (fromTo.first != p->initial) {
930  stateNames.push_back(fromTo.first);
931  }
932  }
933  auto const& oAlph = other.getAlphabet();
934  size_t commonSymbols = 0;
935  for (char32_t symbol : p->alphabet) {
936  if (index_of(oAlph, symbol) < oAlph.size()) {
937  commonSymbols++;
938  } else {
939  for (auto & fromVia : p->transitions) {
940  fromVia.second.erase(symbol);
941  }
942  }
943  }
944  p->alphabet.insert(oAlph.begin(), oAlph.end());
945  Ntransitionmap newTr(stateNames.size() * other.getNumberOfStates());
946  unordered_set<string> newAcc(stateNames.size() * other.getNumberOfStates());
947  unordered_map<size_t, unordered_map<size_t, string const*>> pairToName(stateNames.size() * other.getNumberOfStates());
948  for (size_t q = 0; q < stateNames.size(); q++) {
949  for (size_t qq = 0; qq < other.getNumberOfStates(); qq++) {
950  string potentialName = stateNames[q] + other.getLabelOf(qq);
951  for (auto nameIt = newTr.find(potentialName); nameIt != newTr.end(); nameIt = newTr.find(potentialName)) {
952  potentialName.append("_");
953  }
954  auto nameIt = newTr.emplace(potentialName, commonSymbols);
955  pairToName.emplace(q, 1).first->second.emplace(qq, &(nameIt.first->first));
956  if (p->acceptingStates.find(stateNames[q]) != p->acceptingStates.end() && other.isAccepting(qq)) {
957  newAcc.insert(potentialName);
958  }
959  }
960  p->transitions[stateNames[q]][U'\0'].insert(stateNames[q]);
961  // Needed due to the equivalence of standing still to “taking” an ε-transition.
962  }
963  for (size_t q = 0; q < stateNames.size(); q++) {
964  auto const& qLabel = stateNames[q];
965  auto const& viaTos = p->transitions[qLabel];
966  for (auto const& viaTo : viaTos) {
967  for (size_t qq = 0; qq < other.getNumberOfStates(); qq++) {
968  valarray<bool> const& reached = other.delta(qq, viaTo.first);
969  for (auto const& to : viaTo.second) {
970  size_t p = index_of(stateNames, to);
971  for (size_t pp = 0; pp < reached.size(); pp++) {
972  if (reached[pp] || (pp == qq && viaTo.first == U'\0')) {
973  // Needed due to the equivalence of standing still to “taking” an ε-transition.
974  newTr[*(pairToName[q][qq])][viaTo.first].insert(*(pairToName[p][pp]));
975  }
976  }
977  }
978  }
979  }
980  }
981  for (auto & fromVia : newTr) {
982  auto const& from = fromVia.first;
983  auto to = fromVia.second.find(U'\0');
984  to->second.erase(from);
985  if (to->second.empty()) {
986  fromVia.second.erase(to);
987  }
988  }
989  p->transitions = newTr;
990  p->acceptingStates = newAcc;
991  p->initial = *(pairToName[0][0]);
992  }
993  return *this;
994 }
995 
997 
1001  p->transitions[p->initial];
1002  p->alphabet.insert(U'\0');
1003  vector<char32_t> alphabetV(p->alphabet.begin(), p->alphabet.end());
1004  std::sort(alphabetV.begin(), alphabetV.end());
1005  vector<string> labelV(p->transitions.size());
1006  labelV[0] = p->initial;
1007  valarray<bool> acceptingV(p->transitions.size());
1008  acceptingV[0] = (p->acceptingStates.find(p->initial) != p->acceptingStates.end());
1009  size_t i(1);
1010  for (auto entry : p->transitions) {
1011  if (entry.first == p->initial) {
1012  continue;
1013  }
1014  labelV[i] = entry.first;
1015  acceptingV[i++] = (
1016  p->acceptingStates.find(entry.first) != p->acceptingStates.end()
1017  );
1018  }
1019  vector<vector<valarray<bool>>> transitionV(
1020  p->transitions.size(),
1022  p->alphabet.size()+1,
1023  valarray<bool>(labelV.size())
1024  )
1025  );
1026  unordered_set<string> emptySet;
1027  size_t qi(0);
1028  for (auto const& q : labelV) {
1029  unordered_map<char32_t,unordered_set<string>> const& fromQ = p->transitions[q];
1030  size_t si(0);
1031  for (char32_t symbol : alphabetV) {
1032  auto to = fromQ.find(symbol);
1033  unordered_set<string> const& viaSymbol(to != fromQ.end() ? to->second : emptySet);
1034  i = 0;
1035  for (auto const& p : labelV) {
1036  transitionV[qi][si][i++] = (viaSymbol.count(p) != 0);
1037  }
1038  si++;
1039  }
1040  qi++;
1041  }
1042  return {alphabetV, transitionV, labelV, acceptingV};
1043 }
1044 
1045 nfa::builder::~builder() = default;
1046 } // namespace reg
Ntransitionmap transitions
Transition function (state × symbol → set of states) of the prospective NFA.
Definition: nfa.cpp:662
bool isAccepting(size_t qi) const
Tests whether a state is an accept state within this DFA.
Definition: dfa.cpp:379
nfa build()
Builds the NFA, as defined by previous operations.
Definition: nfa.cpp:1000
static nfa::builder complement(nfa const &n)
Creates a builder for an NFA accepting the complement of the language of an NFA.
Definition: nfa.cpp:617
std::string getShortestUtf8Word() const
Definition: nfa.cpp:249
size_t getNumberOfSymbols() const
Definition: nfa.cpp:459
Represents nondeterministic finite automata with ε-moves.
Definition: nfa.h:25
std::vector< char32_t > const & getAlphabet() const
Fetches this NFA's set of processable symbols.
Definition: nfa.cpp:441
std::string const & getLabelOf(size_t q) const
Definition: nfa.cpp:331
Represents deterministic finite automata.
Definition: dfa.h:27
std::vector< std::string > const & getUtf8Alphabet() const
Fetches this NFA's set of processable symbols as UTF-8-encoded strings.
Definition: nfa.cpp:449
vector< vector< valarray< bool > > > transitions
Stores the transition function as a table viz state index × symbol index → list of bools with true a...
Definition: nfa.cpp:36
Constructs NFAs step by step.
Definition: nfa.h:94
std::wstring_convert< std::codecvt_utf8< char32_t >, char32_t > converter
Converts between UTF-8-encoded and UTF-32-encoded strings.
Definition: dfa.cpp:1060
builder & minimize()
Convenience method for chaining purge and merge to achieve proper minimization.
Definition: dfa.cpp:844
bool operator==(nfa const &n) const
Tests whether this NFA accepts exactly the same language as another one.
Definition: nfa.cpp:87
pImpl()
Constructs private implementation object for an NFA accepting the empty language ∅.
Definition: nfa.cpp:39
vector< string > labels
Stores the names of states.
Definition: nfa.cpp:35
static nfa::builder intersect(nfa const &n1, nfa const &n2)
Creates a builder for an NFA accepting the intersection of the languages of two NFAs.
Definition: nfa.cpp:588
std::string to_string(std::valarray< bool > const &qs) const
Puts a name to a set of indices.
Definition: nfa.cpp:340
builder & unite(nfa const &other)
Makes the prospective NFA also accept every word of another NFA&#39;s language.
Definition: nfa.cpp:865
size_t delta(size_t qi, size_t si) const
Computes this DFA's transition function for a state index and a symbol index.
Definition: dfa.cpp:227
nfa & operator=(nfa const &n)
Copy-assigns this NFA by copying another one's private implementation object.
Definition: nfa.cpp:635
std::vector< char32_t > const & getAlphabet() const
Fetches this DFA's set of processable symbols.
Definition: dfa.cpp:351
string initial
Name of the prospective NFA&#39;s initial state.
Definition: nfa.cpp:659
std::valarray< bool > encodeSet(state_set const &qs) const
Translates a set of states from set to bool representation.
Definition: nfa.cpp:397
bool operator!=(nfa const &n) const
Tests whether this NFA doesn't accept the same language as another one.
Definition: nfa.cpp:92
builder & addSymbol(char32_t symbol)
Adds a symbol to the prospective NFA's alphabet.
Definition: nfa.cpp:758
state_set getStatesSet() const
Fetches this NFA's set of states as a set of (references to) strings.
Definition: nfa.cpp:425
builder & setAccepting(std::string const &state, bool accept)
Sets whether or not a state will be accepting within the prospective NFA.
Definition: nfa.cpp:774
builder & normalizeStateNames(std::string const &prefix)
Reduces the prospective NFA&#39;s state names to consecutive numbers, prefixed with a given string...
Definition: nfa.cpp:821
size_t index_of(C const &container, T const &element)
Basically Java&#39;s List interface&#39;s indexOf, but as a non-member function and returning the container&#39;s...
Definition: dfa.h:120
Contains the reg::nfa class definition.
valarray< bool > accepting
A true value marks an index as belonging to an accept state.
Definition: nfa.cpp:31
Contains the reg::expression class defintion.
vector< char32_t > alphabet
Represents the set of processable symbols.
Definition: nfa.cpp:32
state_set decodeSet(std::valarray< bool > const &qs) const
Translates a set of states from bool to set representation.
Definition: nfa.cpp:380
bool isAccepting(size_t qi) const
Tests whether a state is an accept state within this NFA.
Definition: nfa.cpp:469
std::u32string getShortestWord() const
Definition: nfa.cpp:244
static nfa::builder unite(nfa const &n1, nfa const &n2)
Creates a builder for an NFA accepting the union of the languages of two NFAs.
Definition: nfa.cpp:576
builder & complement()
Inverts the prospective DFA&#39;s language with respect to all possible strings over its alphabet...
Definition: dfa.cpp:990
Private implementation details of NFAs.
Definition: nfa.cpp:29
std::unordered_set< std::reference_wrapper< std::string const >, hash_reference_string_const, equal_to_reference_string_const > state_set
Nicer name for sets of names of states. Should store references to names in getStates.
Definition: nfa.h:28
unordered_set< char32_t > alphabet
Set of symbols processable by the prospective NFA.
Definition: nfa.cpp:661
Constructs DFAs step by step.
Definition: dfa.h:72
vector< valarray< bool > > epsClosures
Cache for every state&#39;s ε-closures.
Definition: nfa.cpp:34
size_t getNumberOfStates() const
Definition: dfa.cpp:364
builder & makeInitial(std::string const &state)
Resets the initial state for the prospective NFA.
Definition: nfa.cpp:792
vector< string > utf8Alphabet
Represents the set of processable symbols as UTF-8-encoded strings.
Definition: nfa.cpp:33
builder & intersect(nfa const &other)
Makes the prospective NFA accept only words accepted also by another NFA.
Definition: nfa.cpp:923
Private implementation details of NFA builders.
Definition: nfa.cpp:658
std::shared_ptr< dfa const > equivalent
Stores a minimal DFA accepting the same language as this object&#39;s NFA.
Definition: nfa.cpp:30
size_t getNumberOfStates() const
Definition: nfa.cpp:454
static nfa::builder subtract(nfa const &n1, nfa const &n2)
Creates a builder for an NFA accepting the set difference of the languages of two NFAs...
Definition: nfa.cpp:600
pImpl(string &initial, unordered_set< string > &acceptingStates, unordered_set< char32_t > &alphabet, Ntransitionmap &transitions)
Constructs private implementation object with provided members.
Definition: nfa.cpp:670
friend std::u32string findShortestWord(nfa const &n)
Searches the shortest UTF-32-encoded word accepted by a given NFA.
Definition: nfa.cpp:535
std::valarray< bool > getStatesBools() const
Fetches this NFA's set of states encoded as an array of bools.
Definition: nfa.cpp:433
std::valarray< bool > deltaHat(size_t qi, std::u32string const &word) const
Computes this NFA's transition function recursively for a string of symbols, starting in a state spec...
Definition: nfa.cpp:164
pImpl(vector< char32_t > &alphabet, vector< vector< valarray< bool >>> &transitions, vector< string > &labels, valarray< bool > &acceptingStates)
Constructs private implementation object with provided members and empty ε-closure cache...
Definition: nfa.cpp:49
dfa const & getEquivalentDfa(nfa const *owner)
Returns the equivalent DFA safely, constructing it if it wasn't already.
Definition: nfa.cpp:70
bool hasAccepting(std::valarray< bool > const &qs) const
Definition: nfa.cpp:520
Where this library lives.
Definition: dfa.cpp:51
nfa()
Constructs an NFA accepting the empty language ∅.
Definition: nfa.cpp:79
string findShortestUtf8Word(dfa const &d)
Same as above for a UTF-8-encoded word.
Definition: dfa.cpp:433
builder & operator=(builder const &b)
Copy-assigns a builder by copying another one's private implementation object.
Definition: nfa.cpp:736
std::string const & getInitialState() const
Names this NFA's initial state.
Definition: nfa.cpp:409
u32string findShortestWord(dfa const &d)
Searches the shortest UTF-32-encoded word accepted by a given DFA.
Definition: dfa.cpp:406
unordered_set< string > acceptingStates
Set of names of the prospective NFA&#39;s accept states.
Definition: nfa.cpp:660
builder & addTransition(std::string const &from, std::string const &to, char32_t symbol)
Adds a transition for the prospective NFA.
Definition: nfa.cpp:805
std::valarray< bool > const & delta(size_t qi, size_t si) const
Computes this NFA's transition function for a state index and a symbol index.
Definition: nfa.cpp:104
dfa build()
Builds the DFA, as defined by previous operations, including completion.
Definition: dfa.cpp:1035
std::string const & getLabelOf(size_t qi) const
Definition: dfa.cpp:327
std::valarray< bool > const & epsilonClosure(size_t qi) const
Computes a state's ε-closure.
Definition: nfa.cpp:259
std::vector< std::string > const & getStates() const
Fetches this NFA's set of states in order of internal representation.
Definition: nfa.cpp:417
pImpl()=default
Constructs empty private implementation object.
Contains the reg::dfa class definition.
builder & addSymbol(char32_t symbol)
Adds a symbol to the prospective DFA's alphabet.
Definition: dfa.cpp:658
builder()
Constructs a blank builder object.
Definition: nfa.cpp:683
friend std::string findShortestUtf8Word(nfa const &n)
Same as above for a UTF-8-encoded word.
Definition: nfa.cpp:564