1 #include "Teuchos_make_lalr1_parser.hpp" 10 #include "Teuchos_vector.hpp" 11 #include "Teuchos_Graph.hpp" 12 #include "Teuchos_stack.hpp" 13 #include "Teuchos_set.hpp" 29 Config::Config(
int p,
int d):
35 StateConfig::StateConfig(
int s,
int cis):
41 void swap(StateInProgress& a, StateInProgress& b) {
43 swap(a.configs, b.configs);
44 swap(a.actions, b.actions);
48 static Configs make_configs(Grammar
const& g) {
50 for (
int i = 0; i <
size(g.productions); ++i) {
51 const Grammar::Production& production = at(g.productions, i);
52 for (
int j = 0; j <=
size(production.rhs); ++j) {
53 configs.push_back(Config(i,j));
59 static Graph get_left_hand_sides_to_start_configs(
60 Configs
const& cs, Grammar
const& grammar) {
61 Graph lhs2sc = make_graph_with_nnodes(grammar.nsymbols);
62 for (
int c_i = 0; c_i <
size(cs); ++c_i) {
63 const Config& c = at(cs, c_i);
64 if (c.dot > 0)
continue;
65 int p_i = c.production;
66 const Grammar::Production& p = at(grammar.productions, p_i);
67 add_edge(lhs2sc, p.lhs, c_i);
73 typedef StateInProgress
const* Value;
74 bool operator()(Value
const& a, Value
const& b)
const {
75 return a->configs < b->configs;
79 typedef std::map<StateInProgress const*, int, StateCompare> StatePtr2StateIndex;
81 static void close(StateInProgress& state,
82 Configs
const& cs, Grammar
const& grammar,
83 Graph
const& lhs2sc) {
84 std::queue<int> config_q;
85 std::set<int> config_set;
86 for (std::vector<int>::const_iterator it = state.configs.begin();
87 it != state.configs.end(); ++it) {
89 config_q.push(config_i);
91 config_set.insert(config_i);
93 while (!config_q.empty()) {
94 int config_i = config_q.front(); config_q.pop();
95 const Config& config = at(cs, config_i);
96 int prod_i = config.production;
97 const Grammar::Production& prod = at(grammar.productions, prod_i);
98 if (config.dot == size(prod.rhs))
continue;
99 int symbol_after_dot = at(prod.rhs, config.dot);
100 if (is_terminal(grammar, symbol_after_dot))
continue;
101 const NodeEdges& edges = get_edges(lhs2sc, symbol_after_dot);
102 for (NodeEdges::const_iterator it = edges.begin();
103 it != edges.end(); ++it) {
105 if (!config_set.count(sc)) {
106 config_set.insert(sc);
111 state.configs.assign(config_set.begin(), config_set.end());
114 static void add_back(StatesInProgress& sips, StateInProgress& sip) {
116 StateInProgressPtr ptr(
new StateInProgress());
121 static void add_reduction_actions(StatesInProgress& states,
122 Configs
const& cs, Grammar
const& grammar) {
123 for (StatesInProgress::iterator it = states.begin();
124 it != states.end(); ++it) {
125 StateInProgressPtr& state_uptr = *it;
126 StateInProgress& state = *state_uptr;
127 for (std::vector<int>::const_iterator it2 = state.configs.begin();
128 it2 != state.configs.end(); ++it2) {
130 const Config& config = at(cs, config_i);
131 int prod_i = config.production;
132 const Grammar::Production& prod = at(grammar.productions, prod_i);
133 if (config.dot != size(prod.rhs))
continue;
134 ActionInProgress reduction;
135 reduction.action.kind = ACTION_REDUCE;
136 reduction.action.production = config.production;
137 state.actions.push_back(reduction);
142 static void set_lr0_contexts(
143 StatesInProgress& states,
144 Grammar
const& grammar) {
145 for (StatesInProgress::iterator it = states.begin();
146 it != states.end(); ++it) {
147 StateInProgressPtr& state_uptr = *it;
148 StateInProgress& state = *state_uptr;
149 for (StateInProgress::Actions::iterator it2 = state.actions.begin();
150 it2 != state.actions.end(); ++it2) {
151 ActionInProgress& action = *it2;
152 if (action.action.kind != ACTION_REDUCE)
continue;
153 if (action.action.production == get_accept_production(grammar)) {
154 action.context.insert(get_end_terminal(grammar));
156 for (
int terminal = 0; terminal < grammar.nterminals; ++terminal) {
157 action.context.insert(terminal);
164 static StatesInProgress make_lr0_parser(Configs
const& cs, Grammar
const& grammar,
165 Graph
const& lhs2sc) {
166 StatesInProgress states;
167 StatePtr2StateIndex state_ptrs2idxs;
168 std::queue<int> state_q;
170 StateInProgress start_state;
171 int accept_nt = get_accept_nonterminal(grammar);
173 int start_accept_config = get_edges(lhs2sc, accept_nt).front();
174 start_state.configs.push_back(start_accept_config);
175 close(start_state, cs, grammar, lhs2sc);
176 int start_state_i = size(states);
177 state_q.push(start_state_i);
178 add_back(states, start_state);
179 state_ptrs2idxs[states.back().get()] = start_state_i;
181 while (!state_q.empty()) {
182 int state_i = state_q.front(); state_q.pop();
183 StateInProgress& state = *at(states, state_i);
184 std::set<int> transition_symbols;
185 for (std::vector<int>::const_iterator it = state.configs.begin();
186 it != state.configs.end(); ++it) {
188 const Config& config = at(cs, config_i);
189 int prod_i = config.production;
190 const Grammar::Production& prod = at(grammar.productions, prod_i);
191 if (config.dot == size(prod.rhs))
continue;
192 int symbol_after_dot = at(prod.rhs, config.dot);
193 transition_symbols.insert(symbol_after_dot);
195 for (std::set<int>::const_iterator it = transition_symbols.begin();
196 it != transition_symbols.end(); ++it) {
197 int transition_symbol = *it;
198 StateInProgress next_state;
199 for (std::vector<int>::const_iterator it2 = state.configs.begin();
200 it2 != state.configs.end(); ++it2) {
202 const Config& config = at(cs, config_i);
203 int prod_i = config.production;
204 const Grammar::Production& prod = at(grammar.productions, prod_i);
205 if (config.dot == size(prod.rhs))
continue;
206 int symbol_after_dot = at(prod.rhs, config.dot);
207 if (symbol_after_dot != transition_symbol)
continue;
209 int next_config_i = config_i + 1;
210 next_state.configs.push_back(next_config_i);
212 close(next_state, cs, grammar, lhs2sc);
213 StatePtr2StateIndex::iterator it2 = state_ptrs2idxs.find(&next_state);
215 if (it2 == state_ptrs2idxs.end()) {
216 next_state_i = size(states);
217 state_q.push(next_state_i);
218 add_back(states, next_state);
219 state_ptrs2idxs[states.back().get()] = next_state_i;
221 next_state_i = it2->second;
223 ActionInProgress transition;
224 transition.action.kind = ACTION_SHIFT;
225 transition.action.next_state = next_state_i;
226 transition.context.insert(transition_symbol);
227 state.actions.push_back(transition);
230 add_reduction_actions(states, cs, grammar);
231 set_lr0_contexts(states, grammar);
235 static Graph get_productions_by_lhs(Grammar
const& grammar) {
236 int nsymbols = grammar.nsymbols;
237 Graph lhs2prods = make_graph_with_nnodes(nsymbols);
238 for (
int prod_i = 0; prod_i < size(grammar.productions); ++prod_i) {
239 const Grammar::Production& prod = at(grammar.productions, prod_i);
240 add_edge(lhs2prods, prod.lhs, prod_i);
248 static Graph get_symbol_graph(Grammar
const& grammar, Graph
const& lhs2prods) {
249 int nsymbols = grammar.nsymbols;
250 Graph out = make_graph_with_nnodes(nsymbols);
251 for (
int lhs = 0; lhs < nsymbols; ++lhs) {
252 std::set<int> dependees;
253 for (
int i = 0; i < count_edges(lhs2prods, lhs); ++i) {
254 int prod_i = at(lhs2prods, lhs, i);
255 const Grammar::Production& prod = at(grammar.productions, prod_i);
256 for (
int j = 0; j < size(prod.rhs); ++j) {
257 int rhs_symb = at(prod.rhs, j);
258 dependees.insert(rhs_symb);
261 at(out, lhs).assign(dependees.begin(), dependees.end());
273 enum { FIRST_NULL = -425 };
274 typedef std::set<int> FirstSet;
276 static void print_set(std::set<int>
const&
set, Grammar
const& grammar) {
278 for (std::set<int>::const_iterator it =
set.begin(); it !=
set.end(); ++it) {
279 if (it !=
set.begin()) std::cerr <<
", ";
281 if (symb == FIRST_NULL) std::cerr <<
"null";
283 const std::string& symb_name = at(grammar.symbol_names, symb);
284 if (symb_name ==
",") std::cerr <<
"','";
285 else std::cerr << symb_name;
291 static FirstSet get_first_set_of_string(std::vector<int>
const&
string,
292 std::vector<FirstSet>
const& first_sets) {
297 for (i = 0; i < size(
string); ++i) {
298 int symbol = at(
string, i);
299 bool has_null =
false;
300 const FirstSet& first_set = at(first_sets, symbol);
301 for (FirstSet::const_iterator it = first_set.begin();
302 it != first_set.end(); ++it) {
303 int first_symbol = *it;
304 if (first_symbol == FIRST_NULL) has_null =
true;
305 else out.insert(first_symbol);
307 if (!has_null)
break;
309 if (i == size(
string)) out.insert(FIRST_NULL);
316 Event(
int as,
int d):
325 static std::vector<FirstSet> compute_first_sets(Grammar
const& grammar,
327 if (verbose) std::cerr <<
"computing FIRST sets...\n";
328 std::queue<Event> event_q;
329 int nsymbols = grammar.nsymbols;
330 std::vector<FirstSet> first_sets = make_vector<FirstSet>(nsymbols);
331 Graph lhs2prods = get_productions_by_lhs(grammar);
332 for (
int symbol = 0; symbol < nsymbols; ++symbol) {
333 if (is_terminal(grammar, symbol)) {
334 event_q.push(Event(symbol, symbol));
336 for (
int i = 0; i < count_edges(lhs2prods, symbol); ++i) {
337 int prod_i = at(lhs2prods, symbol, i);
338 const Grammar::Production& prod = at(grammar.productions, prod_i);
339 if (prod.rhs.empty()) {
340 event_q.push(Event(FIRST_NULL, symbol));
346 Graph dependers2dependees = get_symbol_graph(grammar, lhs2prods);
347 Graph dependees2dependers = make_transpose(dependers2dependees);
348 while (!event_q.empty()) {
349 Event
event = event_q.front(); event_q.pop();
350 int added_symb =
event.added_symbol;
351 int dependee =
event.dependee;
352 FirstSet& dependee_firsts = at(first_sets, dependee);
354 if (dependee_firsts.count(added_symb))
continue;
355 dependee_firsts.insert(added_symb);
356 for (
int i = 0; i < count_edges(dependees2dependers, dependee); ++i) {
357 int depender = at(dependees2dependers, dependee, i);
359 const FirstSet& depender_firsts = at(first_sets, depender);
360 for (
int j = 0; j < count_edges(lhs2prods, depender); ++j) {
361 int prod_i = at(lhs2prods, depender, j);
362 const Grammar::Production& prod = at(grammar.productions, prod_i);
363 FirstSet rhs_first_set = get_first_set_of_string(prod.rhs, first_sets);
364 for (FirstSet::iterator it = rhs_first_set.begin();
365 it != rhs_first_set.end(); ++it) {
366 int rhs_first_symb = *it;
367 if (!depender_firsts.count(rhs_first_symb)) {
368 event_q.push(Event(rhs_first_symb, depender));
375 for (
int symb = 0; symb < nsymbols; ++symb) {
376 const std::string& symb_name = at(grammar.symbol_names, symb);
377 std::cerr <<
"FIRST(" << symb_name <<
") = {";
378 const FirstSet& c = at(first_sets, symb);
379 for (FirstSet::const_iterator it = c.begin(); it != c.end(); ++it) {
380 if (it != c.begin()) std::cerr <<
", ";
381 int first_symb = *it;
382 if (first_symb == FIRST_NULL) {
385 const std::string& first_name = at(grammar.symbol_names, first_symb);
386 std::cerr << first_name;
396 StateConfigs form_state_configs(StatesInProgress
const& states) {
398 for (
int i = 0; i < size(states); ++i) {
399 StateInProgress& state = *at(states, i);
400 for (
int j = 0; j < size(state.configs); ++j) {
401 out.push_back(StateConfig(i, j));
407 Graph form_states_to_state_configs(StateConfigs
const& scs,
408 StatesInProgress
const& states) {
409 Graph out = make_graph_with_nnodes(size(states));
410 for (
int i = 0; i < size(scs); ++i) {
411 const StateConfig& sc = at(scs, i);
412 at(out, sc.state).push_back(i);
417 static std::string escape_dot(std::string
const& s) {
419 for (std::size_t i = 0; i < s.size(); ++i) {
421 if (c ==
'\\' || c ==
'|' || c ==
'\"' || c ==
'<' || c ==
'>') {
424 }
else if (c ==
'.') {
434 std::string
const& filepath,
435 ParserInProgress
const& pip,
439 const StatesInProgress& sips = pip.states;
440 const Configs& cs = pip.configs;
441 GrammarPtr grammar = pip.grammar;
442 const Graph& states2scs = pip.states2state_configs;
443 os <<
"writing GraphViz file \"" << filepath <<
"\"\n";
444 os <<
"process with:\n";
445 os <<
" dot -Tpdf -o \"" << filepath <<
".pdf\" \"" << filepath <<
"\"\n";
446 std::ofstream file(filepath.c_str());
448 file <<
"digraph {\n";
450 file <<
"rankdir = \"LR\"\n";
452 for (
int s_i = 0; s_i < size(sips); ++s_i) {
453 const StateInProgress& state = *at(sips, s_i);
454 file << s_i <<
" [\n";
455 file <<
"label = \"";
456 file <<
"State " << s_i <<
"\\l";
457 for (
int cis_i = 0; cis_i < size(state.configs); ++cis_i) {
458 int c_i = at(state.configs, cis_i);
459 const Config& config = at(cs, c_i);
460 const Grammar::Production& prod = at(grammar->productions, config.production);
461 int sc_i = at(states2scs, s_i, cis_i);
462 file << sc_i <<
": ";
463 const std::string& lhs_name = at(grammar->symbol_names, prod.lhs);
464 file << escape_dot(lhs_name) <<
" ::= ";
465 for (
int rhs_i = 0; rhs_i <= size(prod.rhs); ++rhs_i) {
466 if (rhs_i == config.dot) file <<
" .";
467 if (rhs_i < size(prod.rhs)) {
468 int rhs_symb = at(prod.rhs, rhs_i);
469 const std::string& rhs_symb_name = at(grammar->symbol_names, rhs_symb);
470 file <<
" " << escape_dot(rhs_symb_name);
473 if (config.dot == size(prod.rhs)) {
476 for (
int a_i = 0; a_i < size(state.actions); ++a_i) {
477 const ActionInProgress& action = at(state.actions, a_i);
478 if (action.action.kind == ACTION_REDUCE &&
479 action.action.production == config.production) {
481 const Context& ac = action.context;
482 for (Context::const_iterator it = ac.begin(); it != ac.end(); ++it) {
483 if (it != ac.begin()) file <<
", ";
485 const std::string& symb_name = at(grammar->symbol_names, symb);
486 file << escape_dot(symb_name);
491 "BUG: missing reduce action in state " << s_i <<
" !!!\n");
497 file <<
"shape = \"record\"\n";
499 for (
int a_i = 0; a_i < size(state.actions); ++a_i) {
500 const ActionInProgress& action = at(state.actions, a_i);
501 if (action.action.kind == ACTION_SHIFT) {
502 int symb = *(action.context.begin());
503 const std::string& symb_name = at(grammar->symbol_names, symb);
504 int next = action.action.next_state;
505 file << s_i <<
" -> " << next <<
" [\n";
506 file <<
"label = \"" << escape_dot(symb_name) <<
"\"\n";
514 static Graph make_immediate_predecessor_graph(
515 StateConfigs
const& scs,
516 StatesInProgress
const& states,
517 Graph
const& states2scs,
519 GrammarPtr grammar) {
520 Graph out = make_graph_with_nnodes(size(scs));
521 for (
int s_i = 0; s_i < size(states); ++s_i) {
522 const StateInProgress& state = *at(states, s_i);
523 for (
int cis_i = 0; cis_i < size(state.configs); ++cis_i) {
524 int config_i = at(state.configs, cis_i);
525 const Config& config = at(cs, config_i);
526 const Grammar::Production& prod = at(grammar->productions, config.production);
527 int dot = config.dot;
528 if (dot == size(prod.rhs))
continue;
529 int s = at(prod.rhs, dot);
530 if (is_terminal(*grammar, s))
continue;
531 for (
int cis_j = 0; cis_j < size(state.configs); ++cis_j) {
532 int config_j = at(state.configs, cis_j);
533 const Config& config2 = at(cs, config_j);
534 const Grammar::Production& prod2 = at(grammar->productions, config2.production);
535 if (prod2.lhs == s) {
536 int sc_i = at(states2scs, s_i, cis_i);
537 int sc_j = at(states2scs, s_i, cis_j);
538 add_edge(out, sc_j, sc_i);
546 static Graph find_transition_predecessors(
547 StateConfigs
const& scs,
548 StatesInProgress
const& states,
549 Graph
const& states2scs,
551 GrammarPtr grammar) {
552 Graph out = make_graph_with_nnodes(size(scs));
553 for (
int state_i = 0; state_i < size(states); ++state_i) {
554 const StateInProgress& state = *at(states, state_i);
555 for (
int a_i = 0; a_i < size(state.actions); ++a_i) {
556 const ActionInProgress& action = at(state.actions, a_i);
557 if (action.action.kind != ACTION_SHIFT)
continue;
559 int symbol = *(action.context.begin());
560 int state_j = action.action.next_state;
561 const StateInProgress& state2 = *at(states, state_j);
562 for (
int cis_i = 0; cis_i < size(state.configs); ++cis_i) {
563 int config_i = at(state.configs, cis_i);
564 const Config& config = at(cs, config_i);
565 for (
int cis_j = 0; cis_j < size(state2.configs); ++cis_j) {
566 int config_j = at(state2.configs, cis_j);
567 const Config& config2 = at(cs, config_j);
568 if (config.production == config2.production &&
569 config.dot + 1 == config2.dot) {
570 const Grammar::Production& prod = at(grammar->productions, config.production);
571 int rhs_symbol = at(prod.rhs, config.dot);
572 if (rhs_symbol == symbol) {
573 int sc_i = at(states2scs, state_i, cis_i);
574 int sc_j = at(states2scs, state_j, cis_j);
575 add_edge(out, sc_j, sc_i);
585 static Graph make_originator_graph(
586 StateConfigs
const& scs,
587 StatesInProgress
const& states,
588 Graph
const& states2scs,
590 GrammarPtr grammar) {
591 Graph out = make_graph_with_nnodes(size(scs));
592 Graph ipg = make_immediate_predecessor_graph(
593 scs, states, states2scs, cs, grammar);
594 Graph tpg = find_transition_predecessors(
595 scs, states, states2scs, cs, grammar);
596 for (
int sc_i = 0; sc_i < size(scs); ++sc_i) {
597 std::set<int> originators;
605 while (!tpq.empty()) {
606 int tpp = tpq.front(); tpq.pop();
607 for (
int i = 0; i < count_edges(tpg, tpp); ++i) {
608 int tpc = at(tpg, tpp, i);
609 if (tps.count(tpc))
continue;
613 for (
int i = 0; i < count_edges(ipg, tpp); ++i) {
614 int ip_i = at(ipg, tpp, i);
615 originators.insert(ip_i);
618 at(out, sc_i).assign(originators.begin(), originators.end());
623 static std::vector<int> get_follow_string(
625 StateConfigs
const& scs,
626 StatesInProgress
const& states,
628 GrammarPtr grammar) {
629 const StateConfig& sc = at(scs, sc_addr);
630 const StateInProgress& state = *at(states, sc.state);
631 int config_i = at(state.configs, sc.config_in_state);
632 const Config& config = at(cs, config_i);
633 const Grammar::Production& prod = at(grammar->productions, config.production);
634 int out_size = size(prod.rhs) - (config.dot + 1);
635 std::vector<int> out;
637 if (out_size < 1)
return out;
638 reserve(out, out_size);
639 for (
int i = config.dot + 1; i < size(prod.rhs); ++i) {
640 out.push_back(at(prod.rhs, i));
645 static void print_string(std::vector<int>
const& str, GrammarPtr grammar) {
647 for (
int i = 0; i < size(str); ++i) {
648 int symb = at(str, i);
649 const std::string& symb_name = at(grammar->symbol_names, symb);
650 std::cerr << symb_name;
655 static bool has_non_null_terminal_descendant(FirstSet
const& first_set) {
656 if (first_set.empty())
return false;
657 if (first_set.size() > 1)
return true;
658 return *(first_set.begin()) != FIRST_NULL;
661 static Context get_contexts(FirstSet first_set) {
662 FirstSet::iterator it = first_set.find(FIRST_NULL);
663 if (it != first_set.end()) first_set.erase(it);
667 enum { MARKER = -433 };
668 enum { ZERO = -100 };
670 static void print_stack(std::vector<int>
const& stack) {
671 for (
int i = 0; i < size(stack); ++i) {
672 int symb = at(stack, i);
673 if (symb == MARKER) std::cerr <<
" M";
674 else if (symb == ZERO) std::cerr <<
" Z";
675 else std::cerr <<
" " << symb;
680 static void move_markers(
681 std::vector<int>& lane,
682 std::vector<int>& in_lane,
687 int loc_of_zeta_prime = at(in_lane, zeta_prime_addr);
690 for (
int i = loc_of_zeta_prime + 1; i < zeta_pointer; ++i) {
691 if (at(lane, i) == MARKER) {
698 top_addr = lane.back();
699 lane.resize(lane.size() - 1);
701 for (
int i = 0; i < r; ++i) lane.push_back(MARKER);
703 lane.push_back(top_addr);
704 at(in_lane, top_addr) = size(lane) - 1;
708 typedef std::vector<Context> Contexts;
710 static void context_adding_routine(
711 std::vector<int>
const& lane,
713 Context& contexts_generated,
719 std::cerr <<
" CONTEXT ADDING ROUTINE\n";
720 std::cerr <<
" LANE:";
722 std::cerr <<
" $\\zeta$-POINTER = " << zeta_pointer <<
'\n';
724 for (
int r = zeta_pointer; r >= 0 && (!contexts_generated.empty()); --r) {
725 int v_r = at(lane, r);
726 if (verbose) std::cerr <<
" r = " << r <<
", $v_r$ = ";
729 if (v_r == MARKER) std::cerr <<
"marker\n";
730 else if (v_r == ZERO) std::cerr <<
"zero\n";
734 int tau_r_addr = v_r;
736 std::cerr <<
"$\\tau_r$ = " << tau_r_addr <<
'\n';
737 std::cerr <<
" CONTEXTS_GENERATED = ";
738 print_set(contexts_generated, *grammar);
739 std::cerr <<
"\n CONTEXTS_$\\tau_r$ = ";
740 print_set(at(contexts, tau_r_addr), *grammar);
741 std::cerr <<
"\n CONTEXTS_GENERATED <- CONTEXTS_GENERATED - CONTEXTS_$\\tau_r$";
743 subtract_from(contexts_generated, at(contexts, tau_r_addr));
745 std::cerr <<
"\n CONTEXTS_GENERATED = ";
746 print_set(contexts_generated, *grammar);
747 std::cerr <<
"\n CONTEXTS_$\\tau_r$ <- CONTEXTS_$\\tau_r$ U CONTEXTS_GENERATED";
749 unite_with(at(contexts, tau_r_addr), contexts_generated);
751 std::cerr <<
"\n CONTEXTS_$\\tau_r$ = ";
752 print_set(at(contexts, tau_r_addr), *grammar);
758 static void deal_with_tests_failed(
759 int& num_originators_failed,
760 int& first_originator_failed,
763 std::vector<int>& lane,
764 std::vector<int>& in_lane,
766 std::vector<int>& stack,
769 if (verbose) std::cerr <<
" Dealing with test failures\n";
770 if (num_originators_failed == 0) {
771 if (verbose) std::cerr <<
" " << zeta_prime_addr <<
" is the first originator of " << zeta_addr <<
" to fail the tests\n";
772 first_originator_failed = zeta_prime_addr;
773 if (verbose) std::cerr <<
" pushing " << zeta_prime_addr <<
" onto LANE:\n ";
774 lane.push_back(zeta_prime_addr);
775 if (verbose) print_stack(lane);
776 at(in_lane, zeta_prime_addr) = size(lane) - 1;
777 if (verbose) std::cerr <<
" IN_LANE(" << zeta_prime_addr <<
") <- ON\n";
779 if (verbose) std::cerr <<
" TESTS_FAILED <- ON\n";
780 }
else if (num_originators_failed == 1) {
781 if (verbose) std::cerr <<
" " << zeta_prime_addr <<
" is the second originator of " << zeta_addr <<
" to fail the tests\n";
782 int zeta_double_prime_addr = first_originator_failed;
783 if (verbose) std::cerr <<
" the first was " << zeta_double_prime_addr <<
'\n';
784 TEUCHOS_ASSERT(at(lane, size(lane) - 1) == zeta_double_prime_addr);
786 if (verbose) std::cerr <<
" pop LANE, push {marker, " << zeta_double_prime_addr <<
"} onto it:\n ";
787 lane.resize(lane.size() - 1);
788 lane.push_back(MARKER);
789 lane.push_back(zeta_double_prime_addr);
790 if (verbose) print_stack(lane);
791 if (verbose) std::cerr <<
" push {marker, " << zeta_prime_addr <<
"} onto STACK:\n ";
792 stack.push_back(MARKER);
793 stack.push_back(zeta_prime_addr);
794 if (verbose) print_stack(stack);
796 if (verbose) std::cerr <<
" " << zeta_prime_addr <<
" is the third or later originator of " << zeta_addr <<
" to fail the tests\n";
797 if (verbose) std::cerr <<
" pushing " << zeta_prime_addr <<
" onto STACK:\n ";
798 stack.push_back(zeta_prime_addr);
799 if (verbose) print_stack(stack);
801 ++num_originators_failed;
804 static void heuristic_propagation_of_context_sets(
807 std::vector<bool>& complete,
808 StateConfigs
const& scs,
809 StatesInProgress
const& states,
810 Graph
const& states2scs,
814 const StateConfig& tau = at(scs, tau_addr);
815 const StateInProgress& state = *at(states, tau.state);
816 int config_i = at(state.configs, tau.config_in_state);
817 const Config& config = at(cs, config_i);
818 if (config.dot != 0)
return;
819 const Grammar::Production& prod = at(grammar->productions, config.production);
820 for (
int cis_j = 0; cis_j < size(state.configs); ++cis_j) {
821 int config_j = at(state.configs, cis_j);
822 if (config_j == config_i)
continue;
823 const Config& config2 = at(cs, config_j);
824 if (config2.dot != 0)
continue;
825 const Grammar::Production& prod2 = at(grammar->productions, config2.production);
826 if (prod.lhs != prod2.lhs)
continue;
827 int tau_prime_addr = at(states2scs, tau.state, cis_j);
828 at(contexts, tau_prime_addr) = at(contexts, tau_addr);
829 at(complete, tau_prime_addr) =
true;
835 static void compute_context_set(
838 std::vector<bool>& complete,
839 StateConfigs
const& scs,
840 Graph
const& originator_graph,
841 StatesInProgress
const& states,
842 Graph
const& states2scs,
844 std::vector<FirstSet>
const& first_sets,
848 if (verbose) std::cerr <<
"Computing context set for $\\zeta_j$ = " << zeta_j_addr <<
"...\n";
849 if (verbose) std::cerr <<
"BEGIN PROGRAM\n";
850 if (at(complete, zeta_j_addr)) {
851 if (verbose) std::cerr << zeta_j_addr <<
" was already complete!\nEND PROGRAM\n\n";
854 std::vector<int> stack;
856 std::vector<int> lane;
857 std::vector<int> in_lane = make_vector<int>(size(scs), -1);
858 lane.push_back(zeta_j_addr);
859 at(in_lane, zeta_j_addr) = size(lane) - 1;
860 bool tests_failed =
false;
861 Context contexts_generated;
863 std::cerr <<
"Initial LANE:";
868 int zeta_addr = lane.back();
870 std::cerr <<
"Top of LANE is $\\zeta$ = " << zeta_addr <<
'\n';
872 int zeta_pointer = size(lane) - 1;
873 if (verbose) std::cerr <<
"$\\zeta$-POINTER <- " << zeta_pointer <<
'\n';
874 int num_originators_failed = 0;
875 int first_originator_failed = -1;
876 if (verbose) std::cerr <<
"DO_LOOP:\n";
878 for (
int i = 0; i < count_edges(originator_graph, zeta_addr); ++i) {
879 int zeta_prime_addr = at(originator_graph, zeta_addr, i);
881 std::cerr <<
"Next originator of $\\zeta$ = " << zeta_addr <<
" is $\\zeta'$ = " << zeta_prime_addr <<
'\n';
883 std::vector<int> gamma = get_follow_string(zeta_prime_addr, scs, states, cs, grammar);
885 std::cerr <<
" FOLLOW string of $\\zeta'$ = " << zeta_prime_addr <<
" is ";
886 print_string(gamma, grammar);
889 FirstSet gamma_first = get_first_set_of_string(gamma, first_sets);
891 std::cerr <<
" FIRST set of ";
892 print_string(gamma, grammar);
894 print_set(gamma_first, *grammar);
897 if (has_non_null_terminal_descendant(gamma_first)) {
900 print_string(gamma, grammar);
901 std::cerr <<
" has a non-null terminal descendant\n";
903 contexts_generated = get_contexts(gamma_first);
905 std::cerr <<
" CONTEXTS_GENERATED = ";
906 print_set(contexts_generated, *grammar);
907 std::cerr <<
" = 1-heads of non-null descendants of ";
908 print_string(gamma, grammar);
911 if (gamma_first.count(FIRST_NULL)) {
914 print_string(gamma, grammar);
915 std::cerr <<
" has a null terminal descendant\n";
917 if (at(complete, zeta_prime_addr)) {
918 unite_with(contexts_generated, at(contexts, zeta_prime_addr));
919 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
921 }
else if (-1 == at(in_lane, zeta_prime_addr)) {
922 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
925 deal_with_tests_failed(num_originators_failed, first_originator_failed,
926 zeta_prime_addr, tests_failed, lane, in_lane, zeta_addr, stack,
929 throw ParserBuildFail(
"error: grammar is ambiguous.\n");
932 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
938 print_string(gamma, grammar);
939 std::cerr <<
" does not have a non-null terminal descendant\n";
941 if (at(complete, zeta_prime_addr)) {
942 if (verbose) std::cerr <<
" COMPLETE(" << zeta_prime_addr <<
") is ON\n";
943 contexts_generated = at(contexts, zeta_prime_addr);
944 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
947 if (verbose) std::cerr <<
" COMPLETE(" << zeta_prime_addr <<
") is OFF\n";
948 if (-1 != at(in_lane, zeta_prime_addr)) {
949 if (verbose) std::cerr <<
" IN_LANE(" << zeta_prime_addr <<
") is ON\n";
950 move_markers(lane, in_lane, zeta_prime_addr, zeta_pointer, tests_failed);
951 contexts_generated = at(contexts, zeta_prime_addr);
952 context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
955 if (verbose) std::cerr <<
" IN_LANE(" << zeta_prime_addr <<
") is OFF\n";
956 deal_with_tests_failed(num_originators_failed, first_originator_failed,
957 zeta_prime_addr, tests_failed, lane, in_lane, zeta_addr, stack,
963 if (verbose) std::cerr <<
"END DO_LOOP\n";
966 std::cerr <<
" TESTS_FAILED was on, turning it off and going to next configuration\n";
968 tests_failed =
false;
971 bool keep_lane_popping =
true;
972 if (verbose) std::cerr <<
" Start LANE popping\n";
973 while (keep_lane_popping) {
976 std::cerr <<
" LANE:";
979 if (at(lane, size(lane) - 1) == MARKER) {
980 if (verbose) std::cerr <<
" Top of LANE is a marker\n";
981 if (verbose) std::cerr <<
" Start STACK popping\n";
985 std::cerr <<
" STACK:";
987 std::cerr <<
" LANE:";
990 if (stack.back() == MARKER) {
991 if (verbose) std::cerr <<
" Top of STACK is a marker, pop STACK and LANE\n";
992 resize(stack, size(stack) - 1);
993 resize(lane, size(lane) - 1);
995 }
else if (at(complete, stack.back())) {
996 if (verbose) std::cerr <<
" Top of STACK is has COMPLETE flag, pop STACK\n";
997 resize(stack, size(stack) - 1);
1000 int addr = stack.back();
1001 if (verbose) std::cerr <<
" Top of STACK is " << addr <<
", pop STACK\n";
1002 resize(stack, size(stack) - 1);
1003 if (verbose) std::cerr <<
" Push " << addr <<
" onto LANE\n";
1004 lane.push_back(addr);
1005 if (verbose) std::cerr <<
" IN_LANE(" << addr <<
") <- ON\n";
1006 at(in_lane, addr) = size(lane) - 1;
1007 keep_lane_popping =
false;
1011 }
else if (at(lane, size(lane) - 1) == ZERO) {
1012 if (verbose) std::cerr <<
" Top of LANE is a zero\n";
1013 if (verbose) std::cerr <<
" Pop LANE\n";
1014 resize(lane, size(lane) - 1);
1017 int tau_addr = lane.back();
1018 if (verbose) std::cerr <<
" Top of LANE is $\\tau$ = " << tau_addr <<
"\n";
1019 at(in_lane, tau_addr) = -1;
1020 if (verbose) std::cerr <<
" IN_LANE(" << tau_addr <<
") <- OFF\n";
1021 at(complete, tau_addr) =
true;
1022 if (verbose) std::cerr <<
" COMPLETE(" << tau_addr <<
") <- ON\n";
1023 if (verbose) std::cerr <<
" HEURISTIC PROPAGATION OF CONTEXT SETS\n";
1024 heuristic_propagation_of_context_sets(
1025 tau_addr, contexts, complete,
1026 scs, states, states2scs, cs, grammar);
1027 if (size(lane) == 1 && at(lane, 0) == zeta_j_addr) {
1028 if (verbose) std::cerr <<
"END PROGRAM\n\n";
1031 if (verbose) std::cerr <<
" Pop LANE\n";
1032 resize(lane, size(lane) - 1);
1039 static std::vector<bool> determine_adequate_states(
1040 StatesInProgress
const& states,
1044 std::vector<bool> out = make_vector<bool>(size(states));
1045 for (
int s_i = 0; s_i < size(states); ++s_i) {
1046 const StateInProgress& state = *at(states, s_i);
1047 bool state_is_adequate =
true;
1048 for (
int a_i = 0; a_i < size(state.actions); ++a_i) {
1049 const ActionInProgress& action = at(state.actions, a_i);
1050 if (action.action.kind == ACTION_SHIFT &&
1051 is_nonterminal(*grammar, *(action.context.begin()))) {
1054 for (
int a_j = a_i + 1; a_j < size(state.actions); ++a_j) {
1055 const ActionInProgress& action2 = at(state.actions, a_j);
1056 if (action2.action.kind == ACTION_SHIFT &&
1057 is_nonterminal(*grammar, *(action2.context.begin()))) {
1060 if (intersects(action2.context, action.context)) {
1062 const ActionInProgress* ap1 = &action;
1063 const ActionInProgress* ap2 = &action2;
1064 if (ap1->action.kind == ACTION_SHIFT) {
1065 std::swap(ap1, ap2);
1068 os <<
"shift-reduce conflict in state " << s_i <<
":\n";
1070 const Grammar::Production& prod = at(grammar->productions, ap1->action.production);
1071 const std::string& lhs_name = at(grammar->symbol_names, prod.lhs);
1072 os << lhs_name <<
" ::=";
1073 for (
int rhs_i = 0; rhs_i < size(prod.rhs); ++rhs_i) {
1074 int rhs_symb = at(prod.rhs, rhs_i);
1075 const std::string& rhs_symb_name = at(grammar->symbol_names, rhs_symb);
1076 os <<
" " << rhs_symb_name;
1078 int shift_symb = *(ap2->context.begin());
1079 const std::string& shift_name = at(grammar->symbol_names, shift_symb);
1080 os <<
"\nshift " << shift_name <<
'\n';
1082 state_is_adequate =
false;
1086 if (!state_is_adequate)
break;
1088 at(out, s_i) = state_is_adequate;
1090 if (verbose) os <<
'\n';
1094 ParserInProgress draft_lalr1_parser(GrammarPtr grammar,
bool verbose) {
1095 ParserInProgress out;
1096 Configs& cs = out.configs;
1097 StatesInProgress& states = out.states;
1098 StateConfigs& scs = out.state_configs;
1099 Graph& states2scs = out.states2state_configs;
1100 out.grammar = grammar;
1101 cs = make_configs(*grammar);
1102 Graph lhs2cs = get_left_hand_sides_to_start_configs(cs, *grammar);
1103 if (verbose) std::cerr <<
"Building LR(0) parser\n";
1104 states = make_lr0_parser(cs, *grammar, lhs2cs);
1105 scs = form_state_configs(states);
1106 states2scs = form_states_to_state_configs(scs, states);
1107 if (verbose) print_graphviz(
"lr0.dot", out,
true, std::cerr);
1108 if (verbose) std::cerr <<
"Checking adequacy of LR(0) machine\n";
1109 std::vector<bool> adequate = determine_adequate_states(states, grammar, verbose,
1111 if (*(std::min_element(adequate.begin(), adequate.end()))) {
1112 if (verbose) std::cerr <<
"The grammar is LR(0)!\n";
1115 std::vector<bool> complete = make_vector<bool>(size(scs),
false);
1116 std::vector<Context> contexts = make_vector<Context>(size(scs));
1117 int accept_prod_i = get_accept_production(*grammar);
1120 for (
int sc_i = 0; sc_i < size(scs); ++sc_i) {
1121 StateConfig& sc = at(scs, sc_i);
1122 StateInProgress& state = *at(states, sc.state);
1123 int config_i = at(state.configs, sc.config_in_state);
1124 Config& config = at(cs, config_i);
1125 if (config.production == accept_prod_i) {
1126 at(complete, sc_i) =
true;
1127 at(contexts, sc_i).insert(get_end_terminal(*grammar));
1130 Graph og = make_originator_graph(scs, states, states2scs, cs, grammar);
1131 if (verbose) std::cerr <<
"Originator Graph:\n";
1132 if (verbose) std::cerr << og <<
'\n';
1133 std::vector<FirstSet> first_sets = compute_first_sets(*grammar, verbose);
1136 for (
int s_i = 0; s_i < size(states); ++s_i) {
1137 if (at(adequate, s_i))
continue;
1138 StateInProgress& state = *at(states, s_i);
1139 for (
int cis_i = 0; cis_i < size(state.configs); ++cis_i) {
1140 int config_i = at(state.configs, cis_i);
1141 const Config& config = at(cs, config_i);
1142 const Grammar::Production& prod = at(grammar->productions, config.production);
1143 if (config.dot != size(prod.rhs))
continue;
1144 int zeta_j_addr = at(states2scs, s_i, cis_i);
1145 compute_context_set(zeta_j_addr, contexts, complete, scs,
1146 og, states, states2scs, cs, first_sets, grammar, verbose);
1151 for (
int s_i = 0; s_i < size(states); ++s_i) {
1152 StateInProgress& state = *at(states, s_i);
1153 for (
int cis_i = 0; cis_i < size(state.configs); ++cis_i) {
1154 int sc_i = at(states2scs, s_i, cis_i);
1155 if (!at(complete, sc_i))
continue;
1156 int config_i = at(state.configs, cis_i);
1157 Config& config = at(cs, config_i);
1158 const Grammar::Production& prod = at(grammar->productions, config.production);
1159 if (config.dot != size(prod.rhs))
continue;
1160 for (
int a_i = 0; a_i < size(state.actions); ++a_i) {
1161 ActionInProgress& action = at(state.actions, a_i);
1162 if (action.action.kind == ACTION_REDUCE &&
1163 action.action.production == config.production) {
1164 action.context = at(contexts, sc_i);
1169 if (verbose) std::cerr <<
"Checking adequacy of LALR(1) machine\n";
1170 adequate = determine_adequate_states(states, grammar, verbose, std::cerr);
1171 if (!(*(std::min_element(adequate.begin(), adequate.end())))) {
1172 std::stringstream ss;
1173 ss <<
"error: The grammar is not LALR(1).\n";
1174 determine_adequate_states(states, grammar,
true, ss);
1175 print_graphviz(
"error.dot", out,
true, ss);
1176 std::string s = ss.str();
1177 throw ParserBuildFail(s);
1179 if (verbose) std::cerr <<
"The grammar is LALR(1)!\n";
1180 if (verbose) print_graphviz(
"lalr1.dot", out,
true, std::cerr);
1184 Parser accept_parser(ParserInProgress
const& pip) {
1185 const StatesInProgress& sips = pip.states;
1186 GrammarPtr grammar = pip.grammar;
1187 Parser out(grammar, size(sips));
1188 for (
int s_i = 0; s_i < size(sips); ++s_i) {
1191 for (
int s_i = 0; s_i < size(sips); ++s_i) {
1192 const StateInProgress& sip = *at(sips, s_i);
1193 for (
int a_i = 0; a_i < size(sip.actions); ++a_i) {
1194 const ActionInProgress& action = at(sip.actions, a_i);
1195 if (action.action.kind == ACTION_SHIFT &&
1196 is_nonterminal(*grammar, *(action.context.begin()))) {
1197 int nt = as_nonterminal(*grammar, *(action.context.begin()));
1198 add_nonterminal_action(out, s_i, nt, action.action.next_state);
1200 for (Context::const_iterator it = action.context.begin();
1201 it != action.context.end(); ++it) {
1204 add_terminal_action(out, s_i, terminal, action.action);
1212 ParserBuildFail::ParserBuildFail(
const std::string& msg):
1213 std::invalid_argument(msg) {
1217 ParserInProgress pip = draft_lalr1_parser(grammar, verbose);
1218 return accept_parser(pip);
Parser make_lalr1_parser(GrammarPtr grammar, bool verbose)
Tries to create LALR(1) parser tables for a given grammar.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
int size(const Comm< Ordinal > &comm)
Get the number of processes in the communicator.
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos, as well as a number of utility routines.
TypeTo as(const TypeFrom &t)
Convert from one value type to another.
Smart reference counting pointer class for automatic garbage collection.
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
void swap(Teuchos::any &a, Teuchos::any &b)
Special swap for other code to find via Argument Dependent Lookup.