Teuchos - Trilinos Tools Package  Version of the Day
Teuchos_make_lalr1_parser.cpp
1 #include "Teuchos_make_lalr1_parser.hpp"
2 
3 #include <map>
4 #include <iostream>
5 #include <cstdlib>
6 #include <queue>
7 #include <algorithm>
8 #include <fstream>
9 
10 #include "Teuchos_vector.hpp"
11 #include "Teuchos_Graph.hpp"
12 #include "Teuchos_stack.hpp"
13 #include "Teuchos_set.hpp"
14 
15 namespace Teuchos {
16 
17 /* The LALR(1) parser construction implemented here is based on
18  David Pager's work:
19 
20  Pager, David.
21  "The lane-tracing algorithm for constructing LR (k) parsers
22  and ways of enhancing its efficiency."
23  Information Sciences 12.1 (1977): 19-42.
24 
25  The identifiers used in this code are consistent with the terminology
26  in that paper, except where we bring in FIRST set terminology, which
27  Pager doesn't go into detail about. */
28 
29 Config::Config(int p, int d):
30  production(p),
31  dot(d)
32 {
33 }
34 
35 StateConfig::StateConfig(int s, int cis):
36  state(s),
37  config_in_state(cis)
38 {
39 }
40 
41 void swap(StateInProgress& a, StateInProgress& b) {
42  using std::swap;
43  swap(a.configs, b.configs);
44  swap(a.actions, b.actions);
45 }
46 
47 // expand the grammar productions into marked productions
48 static Configs make_configs(Grammar const& g) {
49  Configs configs;
50  for (int i = 0; i < size(g.productions); ++i) {
51  const Grammar::Production& production = at(g.productions, i);
52  for (int j = 0; j <= size(production.rhs); ++j) {
53  configs.push_back(Config(i,j));
54  }
55  }
56  return configs;
57 }
58 
59 static Graph get_left_hand_sides_to_start_configs(
60  Configs const& cs, Grammar const& grammar) {
61  Graph lhs2sc = make_graph_with_nnodes(grammar.nsymbols);
62  for (int c_i = 0; c_i < size(cs); ++c_i) {
63  const Config& c = at(cs, c_i);
64  if (c.dot > 0) continue;
65  int p_i = c.production;
66  const Grammar::Production& p = at(grammar.productions, p_i);
67  add_edge(lhs2sc, p.lhs, c_i);
68  }
69  return lhs2sc;
70 }
71 
72 struct StateCompare {
73  typedef StateInProgress const* Value;
74  bool operator()(Value const& a, Value const& b) const {
75  return a->configs < b->configs;
76  }
77 };
78 
79 typedef std::map<StateInProgress const*, int, StateCompare> StatePtr2StateIndex;
80 
81 static void close(StateInProgress& state,
82  Configs const& cs, Grammar const& grammar,
83  Graph const& lhs2sc) {
84  std::queue<int> config_q;
85  std::set<int> config_set;
86  for (std::vector<int>::const_iterator it = state.configs.begin();
87  it != state.configs.end(); ++it) {
88  int config_i = *it;
89  config_q.push(config_i);
90  TEUCHOS_ASSERT(!config_set.count(config_i));
91  config_set.insert(config_i);
92  }
93  while (!config_q.empty()) {
94  int config_i = config_q.front(); config_q.pop();
95  const Config& config = at(cs, config_i);
96  int prod_i = config.production;
97  const Grammar::Production& prod = at(grammar.productions, prod_i);
98  if (config.dot == size(prod.rhs)) continue;
99  int symbol_after_dot = at(prod.rhs, config.dot);
100  if (is_terminal(grammar, symbol_after_dot)) continue;
101  const NodeEdges& edges = get_edges(lhs2sc, symbol_after_dot);
102  for (NodeEdges::const_iterator it = edges.begin();
103  it != edges.end(); ++it) {
104  int sc = *it;
105  if (!config_set.count(sc)) {
106  config_set.insert(sc);
107  config_q.push(sc);
108  }
109  }
110  }
111  state.configs.assign(config_set.begin(), config_set.end());
112 }
113 
114 static void add_back(StatesInProgress& sips, StateInProgress& sip) {
115  using std::swap;
116  StateInProgressPtr ptr(new StateInProgress());
117  swap(*ptr, sip);
118  sips.push_back(ptr);
119 }
120 
121 static void add_reduction_actions(StatesInProgress& states,
122  Configs const& cs, Grammar const& grammar) {
123  for (StatesInProgress::iterator it = states.begin();
124  it != states.end(); ++it) {
125  StateInProgressPtr& state_uptr = *it;
126  StateInProgress& state = *state_uptr;
127  for (std::vector<int>::const_iterator it2 = state.configs.begin();
128  it2 != state.configs.end(); ++it2) {
129  int config_i = *it2;
130  const Config& config = at(cs, config_i);
131  int prod_i = config.production;
132  const Grammar::Production& prod = at(grammar.productions, prod_i);
133  if (config.dot != size(prod.rhs)) continue;
134  ActionInProgress reduction;
135  reduction.action.kind = ACTION_REDUCE;
136  reduction.action.production = config.production;
137  state.actions.push_back(reduction);
138  }
139  }
140 }
141 
142 static void set_lr0_contexts(
143  StatesInProgress& states,
144  Grammar const& grammar) {
145  for (StatesInProgress::iterator it = states.begin();
146  it != states.end(); ++it) {
147  StateInProgressPtr& state_uptr = *it;
148  StateInProgress& state = *state_uptr;
149  for (StateInProgress::Actions::iterator it2 = state.actions.begin();
150  it2 != state.actions.end(); ++it2) {
151  ActionInProgress& action = *it2;
152  if (action.action.kind != ACTION_REDUCE) continue;
153  if (action.action.production == get_accept_production(grammar)) {
154  action.context.insert(get_end_terminal(grammar));
155  } else {
156  for (int terminal = 0; terminal < grammar.nterminals; ++terminal) {
157  action.context.insert(terminal);
158  }
159  }
160  }
161  }
162 }
163 
164 static StatesInProgress make_lr0_parser(Configs const& cs, Grammar const& grammar,
165  Graph const& lhs2sc) {
166  StatesInProgress states;
167  StatePtr2StateIndex state_ptrs2idxs;
168  std::queue<int> state_q;
169  { /* start state */
170  StateInProgress start_state;
171  int accept_nt = get_accept_nonterminal(grammar);
172  /* there should only be one start configuration for the accept symbol */
173  int start_accept_config = get_edges(lhs2sc, accept_nt).front();
174  start_state.configs.push_back(start_accept_config);
175  close(start_state, cs, grammar, lhs2sc);
176  int start_state_i = size(states);
177  state_q.push(start_state_i);
178  add_back(states, start_state);
179  state_ptrs2idxs[states.back().get()] = start_state_i;
180  }
181  while (!state_q.empty()) {
182  int state_i = state_q.front(); state_q.pop();
183  StateInProgress& state = *at(states, state_i);
184  std::set<int> transition_symbols;
185  for (std::vector<int>::const_iterator it = state.configs.begin();
186  it != state.configs.end(); ++it) {
187  int config_i = *it;
188  const Config& config = at(cs, config_i);
189  int prod_i = config.production;
190  const Grammar::Production& prod = at(grammar.productions, prod_i);
191  if (config.dot == size(prod.rhs)) continue;
192  int symbol_after_dot = at(prod.rhs, config.dot);
193  transition_symbols.insert(symbol_after_dot);
194  }
195  for (std::set<int>::const_iterator it = transition_symbols.begin();
196  it != transition_symbols.end(); ++it) {
197  int transition_symbol = *it;
198  StateInProgress next_state;
199  for (std::vector<int>::const_iterator it2 = state.configs.begin();
200  it2 != state.configs.end(); ++it2) {
201  int config_i = *it2;
202  const Config& config = at(cs, config_i);
203  int prod_i = config.production;
204  const Grammar::Production& prod = at(grammar.productions, prod_i);
205  if (config.dot == size(prod.rhs)) continue;
206  int symbol_after_dot = at(prod.rhs, config.dot);
207  if (symbol_after_dot != transition_symbol) continue;
208  /* transition successor should just be the next index */
209  int next_config_i = config_i + 1;
210  next_state.configs.push_back(next_config_i);
211  }
212  close(next_state, cs, grammar, lhs2sc);
213  StatePtr2StateIndex::iterator it2 = state_ptrs2idxs.find(&next_state);
214  int next_state_i;
215  if (it2 == state_ptrs2idxs.end()) {
216  next_state_i = size(states);
217  state_q.push(next_state_i);
218  add_back(states, next_state);
219  state_ptrs2idxs[states.back().get()] = next_state_i;
220  } else {
221  next_state_i = it2->second;
222  }
223  ActionInProgress transition;
224  transition.action.kind = ACTION_SHIFT;
225  transition.action.next_state = next_state_i;
226  transition.context.insert(transition_symbol);
227  state.actions.push_back(transition);
228  }
229  }
230  add_reduction_actions(states, cs, grammar);
231  set_lr0_contexts(states, grammar);
232  return states;
233 }
234 
235 static Graph get_productions_by_lhs(Grammar const& grammar) {
236  int nsymbols = grammar.nsymbols;
237  Graph lhs2prods = make_graph_with_nnodes(nsymbols);
238  for (int prod_i = 0; prod_i < size(grammar.productions); ++prod_i) {
239  const Grammar::Production& prod = at(grammar.productions, prod_i);
240  add_edge(lhs2prods, prod.lhs, prod_i);
241  }
242  return lhs2prods;
243 }
244 
245 /* compute a graph where symbols are graph nodes, and
246  there exists an edge (A, B) if B appears in the RHS of
247  any production in which A is the LHS */
248 static Graph get_symbol_graph(Grammar const& grammar, Graph const& lhs2prods) {
249  int nsymbols = grammar.nsymbols;
250  Graph out = make_graph_with_nnodes(nsymbols);
251  for (int lhs = 0; lhs < nsymbols; ++lhs) {
252  std::set<int> dependees;
253  for (int i = 0; i < count_edges(lhs2prods, lhs); ++i) {
254  int prod_i = at(lhs2prods, lhs, i);
255  const Grammar::Production& prod = at(grammar.productions, prod_i);
256  for (int j = 0; j < size(prod.rhs); ++j) {
257  int rhs_symb = at(prod.rhs, j);
258  dependees.insert(rhs_symb);
259  }
260  }
261  at(out, lhs).assign(dependees.begin(), dependees.end());
262  }
263  return out;
264 }
265 
266 /* the "FIRST" set, i.e. the set of 1-heads of non-null terminal descendants of
267  some string.
268  As suggested by Westley Weimer here:
269  https://www.cs.virginia.edu/~weimer/2008-415/reading/FirstFollowLL.pdf
270  we will also use the FIRST set for determining whether the string has
271  a null terminal descendant, indicated by the prescence of a special
272  FIRST_NULL symbol in the FIRST set */
273 enum { FIRST_NULL = -425 };
274 typedef std::set<int> FirstSet;
275 
276 static void print_set(std::set<int> const& set, Grammar const& grammar) {
277  std::cerr << "{";
278  for (std::set<int>::const_iterator it = set.begin(); it != set.end(); ++it) {
279  if (it != set.begin()) std::cerr << ", ";
280  int symb = *it;
281  if (symb == FIRST_NULL) std::cerr << "null";
282  else {
283  const std::string& symb_name = at(grammar.symbol_names, symb);
284  if (symb_name == ",") std::cerr << "','";
285  else std::cerr << symb_name;
286  }
287  }
288  std::cerr << "}";
289 }
290 
291 static FirstSet get_first_set_of_string(std::vector<int> const& string,
292  std::vector<FirstSet> const& first_sets) {
293  FirstSet out;
294  /* walk the string, stop when any symbol is found that doesn't
295  have a null terminal descendant */
296  int i;
297  for (i = 0; i < size(string); ++i) {
298  int symbol = at(string, i);
299  bool has_null = false;
300  const FirstSet& first_set = at(first_sets, symbol);
301  for (FirstSet::const_iterator it = first_set.begin();
302  it != first_set.end(); ++it) {
303  int first_symbol = *it;
304  if (first_symbol == FIRST_NULL) has_null = true;
305  else out.insert(first_symbol);
306  }
307  if (!has_null) break;
308  }
309  if (i == size(string)) out.insert(FIRST_NULL);
310  return out;
311 }
312 
313 struct Event {
314  int added_symbol;
315  int dependee;
316  Event(int as, int d):
317  added_symbol(as),
318  dependee(d)
319  {}
320 };
321 
322 /* figure out the FIRST sets for each non-terminal in the grammar.
323  I couldn't find a super-efficient way to do this, so here is a
324  free-for-all event-driven implementation. */
325 static std::vector<FirstSet> compute_first_sets(Grammar const& grammar,
326  bool verbose) {
327  if (verbose) std::cerr << "computing FIRST sets...\n";
328  std::queue<Event> event_q;
329  int nsymbols = grammar.nsymbols;
330  std::vector<FirstSet> first_sets = make_vector<FirstSet>(nsymbols);
331  Graph lhs2prods = get_productions_by_lhs(grammar);
332  for (int symbol = 0; symbol < nsymbols; ++symbol) {
333  if (is_terminal(grammar, symbol)) {
334  event_q.push(Event(symbol, symbol));
335  } else {
336  for (int i = 0; i < count_edges(lhs2prods, symbol); ++i) {
337  int prod_i = at(lhs2prods, symbol, i);
338  const Grammar::Production& prod = at(grammar.productions, prod_i);
339  if (prod.rhs.empty()) {
340  event_q.push(Event(FIRST_NULL, symbol));
341  break;
342  }
343  }
344  }
345  }
346  Graph dependers2dependees = get_symbol_graph(grammar, lhs2prods);
347  Graph dependees2dependers = make_transpose(dependers2dependees);
348  while (!event_q.empty()) {
349  Event event = event_q.front(); event_q.pop();
350  int added_symb = event.added_symbol;
351  int dependee = event.dependee;
352  FirstSet& dependee_firsts = at(first_sets, dependee);
353  /* hopefully we don't get too many duplicate events piled up... */
354  if (dependee_firsts.count(added_symb)) continue;
355  dependee_firsts.insert(added_symb);
356  for (int i = 0; i < count_edges(dependees2dependers, dependee); ++i) {
357  int depender = at(dependees2dependers, dependee, i);
358  TEUCHOS_ASSERT(is_nonterminal(grammar, depender));
359  const FirstSet& depender_firsts = at(first_sets, depender);
360  for (int j = 0; j < count_edges(lhs2prods, depender); ++j) {
361  int prod_i = at(lhs2prods, depender, j);
362  const Grammar::Production& prod = at(grammar.productions, prod_i);
363  FirstSet rhs_first_set = get_first_set_of_string(prod.rhs, first_sets);
364  for (FirstSet::iterator it = rhs_first_set.begin();
365  it != rhs_first_set.end(); ++it) {
366  int rhs_first_symb = *it;
367  if (!depender_firsts.count(rhs_first_symb)) {
368  event_q.push(Event(rhs_first_symb, depender));
369  }
370  }
371  }
372  }
373  }
374  if (verbose) {
375  for (int symb = 0; symb < nsymbols; ++symb) {
376  const std::string& symb_name = at(grammar.symbol_names, symb);
377  std::cerr << "FIRST(" << symb_name << ") = {";
378  const FirstSet& c = at(first_sets, symb);
379  for (FirstSet::const_iterator it = c.begin(); it != c.end(); ++it) {
380  if (it != c.begin()) std::cerr << ", ";
381  int first_symb = *it;
382  if (first_symb == FIRST_NULL) {
383  std::cerr << "null";
384  } else {
385  const std::string& first_name = at(grammar.symbol_names, first_symb);
386  std::cerr << first_name;
387  }
388  }
389  std::cerr << "}\n";
390  }
391  std::cerr << '\n';
392  }
393  return first_sets;
394 }
395 
396 StateConfigs form_state_configs(StatesInProgress const& states) {
397  StateConfigs out;
398  for (int i = 0; i < size(states); ++i) {
399  StateInProgress& state = *at(states, i);
400  for (int j = 0; j < size(state.configs); ++j) {
401  out.push_back(StateConfig(i, j));
402  }
403  }
404  return out;
405 }
406 
407 Graph form_states_to_state_configs(StateConfigs const& scs,
408  StatesInProgress const& states) {
409  Graph out = make_graph_with_nnodes(size(states));
410  for (int i = 0; i < size(scs); ++i) {
411  const StateConfig& sc = at(scs, i);
412  at(out, sc.state).push_back(i);
413  }
414  return out;
415 }
416 
417 static std::string escape_dot(std::string const& s) {
418  std::string out;
419  for (std::size_t i = 0; i < s.size(); ++i) {
420  char c = s[i];
421  if (c == '\\' || c == '|' || c == '\"' || c == '<' || c == '>') {
422  out.push_back('\\');
423  out.push_back(c);
424  } else if (c == '.') {
425  out += "\'.\'";
426  } else {
427  out.push_back(c);
428  }
429  }
430  return out;
431 }
432 
433 void print_graphviz(
434  std::string const& filepath,
435  ParserInProgress const& pip,
436  bool verbose,
437  std::ostream& os
438  ) {
439  const StatesInProgress& sips = pip.states;
440  const Configs& cs = pip.configs;
441  GrammarPtr grammar = pip.grammar;
442  const Graph& states2scs = pip.states2state_configs;
443  os << "writing GraphViz file \"" << filepath << "\"\n";
444  os << "process with:\n";
445  os << " dot -Tpdf -o \"" << filepath << ".pdf\" \"" << filepath << "\"\n";
446  std::ofstream file(filepath.c_str());
447  TEUCHOS_ASSERT(file.is_open());
448  file << "digraph {\n";
449  file << "graph [\n";
450  file << "rankdir = \"LR\"\n";
451  file << "]\n";
452  for (int s_i = 0; s_i < size(sips); ++s_i) {
453  const StateInProgress& state = *at(sips, s_i);
454  file << s_i << " [\n";
455  file << "label = \"";
456  file << "State " << s_i << "\\l";
457  for (int cis_i = 0; cis_i < size(state.configs); ++cis_i) {
458  int c_i = at(state.configs, cis_i);
459  const Config& config = at(cs, c_i);
460  const Grammar::Production& prod = at(grammar->productions, config.production);
461  int sc_i = at(states2scs, s_i, cis_i);
462  file << sc_i << ": ";
463  const std::string& lhs_name = at(grammar->symbol_names, prod.lhs);
464  file << escape_dot(lhs_name) << " ::= ";
465  for (int rhs_i = 0; rhs_i <= size(prod.rhs); ++rhs_i) {
466  if (rhs_i == config.dot) file << " .";
467  if (rhs_i < size(prod.rhs)) {
468  int rhs_symb = at(prod.rhs, rhs_i);
469  const std::string& rhs_symb_name = at(grammar->symbol_names, rhs_symb);
470  file << " " << escape_dot(rhs_symb_name);
471  }
472  }
473  if (config.dot == size(prod.rhs)) {
474  file << ", \\{";
475  bool found = false;
476  for (int a_i = 0; a_i < size(state.actions); ++a_i) {
477  const ActionInProgress& action = at(state.actions, a_i);
478  if (action.action.kind == ACTION_REDUCE &&
479  action.action.production == config.production) {
480  found = true;
481  const Context& ac = action.context;
482  for (Context::const_iterator it = ac.begin(); it != ac.end(); ++it) {
483  if (it != ac.begin()) file << ", ";
484  int symb = *it;
485  const std::string& symb_name = at(grammar->symbol_names, symb);
486  file << escape_dot(symb_name);
487  }
488  }
489  }
490  TEUCHOS_TEST_FOR_EXCEPTION(!found, std::logic_error,
491  "BUG: missing reduce action in state " << s_i << " !!!\n");
492  file << "\\}";
493  }
494  file << "\\l";
495  }
496  file << "\"\n";
497  file << "shape = \"record\"\n";
498  file << "]\n";
499  for (int a_i = 0; a_i < size(state.actions); ++a_i) {
500  const ActionInProgress& action = at(state.actions, a_i);
501  if (action.action.kind == ACTION_SHIFT) {
502  int symb = *(action.context.begin());
503  const std::string& symb_name = at(grammar->symbol_names, symb);
504  int next = action.action.next_state;
505  file << s_i << " -> " << next << " [\n";
506  file << "label = \"" << escape_dot(symb_name) << "\"\n";
507  file << "]\n";
508  }
509  }
510  }
511  file << "}\n";
512 }
513 
514 static Graph make_immediate_predecessor_graph(
515  StateConfigs const& scs,
516  StatesInProgress const& states,
517  Graph const& states2scs,
518  Configs const& cs,
519  GrammarPtr grammar) {
520  Graph out = make_graph_with_nnodes(size(scs));
521  for (int s_i = 0; s_i < size(states); ++s_i) {
522  const StateInProgress& state = *at(states, s_i);
523  for (int cis_i = 0; cis_i < size(state.configs); ++cis_i) {
524  int config_i = at(state.configs, cis_i);
525  const Config& config = at(cs, config_i);
526  const Grammar::Production& prod = at(grammar->productions, config.production);
527  int dot = config.dot;
528  if (dot == size(prod.rhs)) continue;
529  int s = at(prod.rhs, dot);
530  if (is_terminal(*grammar, s)) continue;
531  for (int cis_j = 0; cis_j < size(state.configs); ++cis_j) {
532  int config_j = at(state.configs, cis_j);
533  const Config& config2 = at(cs, config_j);
534  const Grammar::Production& prod2 = at(grammar->productions, config2.production);
535  if (prod2.lhs == s) {
536  int sc_i = at(states2scs, s_i, cis_i);
537  int sc_j = at(states2scs, s_i, cis_j);
538  add_edge(out, sc_j, sc_i);
539  }
540  }
541  }
542  }
543  return out;
544 }
545 
546 static Graph find_transition_predecessors(
547  StateConfigs const& scs,
548  StatesInProgress const& states,
549  Graph const& states2scs,
550  Configs const& cs,
551  GrammarPtr grammar) {
552  Graph out = make_graph_with_nnodes(size(scs));
553  for (int state_i = 0; state_i < size(states); ++state_i) {
554  const StateInProgress& state = *at(states, state_i);
555  for (int a_i = 0; a_i < size(state.actions); ++a_i) {
556  const ActionInProgress& action = at(state.actions, a_i);
557  if (action.action.kind != ACTION_SHIFT) continue;
558  TEUCHOS_ASSERT(action.context.size() == 1);
559  int symbol = *(action.context.begin());
560  int state_j = action.action.next_state;
561  const StateInProgress& state2 = *at(states, state_j);
562  for (int cis_i = 0; cis_i < size(state.configs); ++cis_i) {
563  int config_i = at(state.configs, cis_i);
564  const Config& config = at(cs, config_i);
565  for (int cis_j = 0; cis_j < size(state2.configs); ++cis_j) {
566  int config_j = at(state2.configs, cis_j);
567  const Config& config2 = at(cs, config_j);
568  if (config.production == config2.production &&
569  config.dot + 1 == config2.dot) {
570  const Grammar::Production& prod = at(grammar->productions, config.production);
571  int rhs_symbol = at(prod.rhs, config.dot);
572  if (rhs_symbol == symbol) {
573  int sc_i = at(states2scs, state_i, cis_i);
574  int sc_j = at(states2scs, state_j, cis_j);
575  add_edge(out, sc_j, sc_i);
576  }
577  }
578  }
579  }
580  }
581  }
582  return out;
583 }
584 
585 static Graph make_originator_graph(
586  StateConfigs const& scs,
587  StatesInProgress const& states,
588  Graph const& states2scs,
589  Configs const& cs,
590  GrammarPtr grammar) {
591  Graph out = make_graph_with_nnodes(size(scs));
592  Graph ipg = make_immediate_predecessor_graph(
593  scs, states, states2scs, cs, grammar);
594  Graph tpg = find_transition_predecessors(
595  scs, states, states2scs, cs, grammar);
596  for (int sc_i = 0; sc_i < size(scs); ++sc_i) {
597  std::set<int> originators;
598  /* breadth-first search through the Transition
599  Precessor Graph (tpg), followed by a single hop
600  along the Immediate Predecessor Graph (ipg) */
601  std::queue<int> tpq;
602  std::set<int> tps;
603  tpq.push(sc_i);
604  tps.insert(sc_i);
605  while (!tpq.empty()) {
606  int tpp = tpq.front(); tpq.pop();
607  for (int i = 0; i < count_edges(tpg, tpp); ++i) {
608  int tpc = at(tpg, tpp, i);
609  if (tps.count(tpc)) continue;
610  tpq.push(tpc);
611  tps.insert(tpc);
612  }
613  for (int i = 0; i < count_edges(ipg, tpp); ++i) {
614  int ip_i = at(ipg, tpp, i);
615  originators.insert(ip_i);
616  }
617  }
618  at(out, sc_i).assign(originators.begin(), originators.end());
619  }
620  return out;
621 }
622 
623 static std::vector<int> get_follow_string(
624  int sc_addr,
625  StateConfigs const& scs,
626  StatesInProgress const& states,
627  Configs const& cs,
628  GrammarPtr grammar) {
629  const StateConfig& sc = at(scs, sc_addr);
630  const StateInProgress& state = *at(states, sc.state);
631  int config_i = at(state.configs, sc.config_in_state);
632  const Config& config = at(cs, config_i);
633  const Grammar::Production& prod = at(grammar->productions, config.production);
634  int out_size = size(prod.rhs) - (config.dot + 1);
635  std::vector<int> out;
636  /* out_size can be negative */
637  if (out_size < 1) return out;
638  reserve(out, out_size);
639  for (int i = config.dot + 1; i < size(prod.rhs); ++i) {
640  out.push_back(at(prod.rhs, i));
641  }
642  return out;
643 }
644 
645 static void print_string(std::vector<int> const& str, GrammarPtr grammar) {
646  std::cerr << "\"";
647  for (int i = 0; i < size(str); ++i) {
648  int symb = at(str, i);
649  const std::string& symb_name = at(grammar->symbol_names, symb);
650  std::cerr << symb_name;
651  }
652  std::cerr << "\"";
653 }
654 
655 static bool has_non_null_terminal_descendant(FirstSet const& first_set) {
656  if (first_set.empty()) return false;
657  if (first_set.size() > 1) return true;
658  return *(first_set.begin()) != FIRST_NULL;
659 }
660 
661 static Context get_contexts(FirstSet first_set) {
662  FirstSet::iterator it = first_set.find(FIRST_NULL);
663  if (it != first_set.end()) first_set.erase(it);
664  return first_set;
665 }
666 
667 enum { MARKER = -433 };
668 enum { ZERO = -100 }; // actual zero is a valid index for us
669 
670 static void print_stack(std::vector<int> const& stack) {
671  for (int i = 0; i < size(stack); ++i) {
672  int symb = at(stack, i);
673  if (symb == MARKER) std::cerr << " M";
674  else if (symb == ZERO) std::cerr << " Z";
675  else std::cerr << " " << symb;
676  }
677  std::cerr << '\n';
678 }
679 
680 static void move_markers(
681  std::vector<int>& lane,
682  std::vector<int>& in_lane,
683  int zeta_prime_addr,
684  int zeta_pointer,
685  bool tests_failed
686  ) {
687  int loc_of_zeta_prime = at(in_lane, zeta_prime_addr);
688  TEUCHOS_ASSERT(loc_of_zeta_prime != -1);
689  int r = 0;
690  for (int i = loc_of_zeta_prime + 1; i < zeta_pointer; ++i) {
691  if (at(lane, i) == MARKER) {
692  ++r;
693  at(lane, i) = ZERO;
694  }
695  }
696  int top_addr = -1;
697  if (tests_failed) {
698  top_addr = lane.back();
699  lane.resize(lane.size() - 1); // pop
700  }
701  for (int i = 0; i < r; ++i) lane.push_back(MARKER);
702  if (tests_failed) {
703  lane.push_back(top_addr);
704  at(in_lane, top_addr) = size(lane) - 1;
705  }
706 }
707 
708 typedef std::vector<Context> Contexts;
709 
710 static void context_adding_routine(
711  std::vector<int> const& lane,
712  int zeta_pointer,
713  Context& contexts_generated,
714  Contexts& contexts,
715  bool verbose,
716  GrammarPtr grammar
717  ) {
718  if (verbose) {
719  std::cerr << " CONTEXT ADDING ROUTINE\n";
720  std::cerr << " LANE:";
721  print_stack(lane);
722  std::cerr << " $\\zeta$-POINTER = " << zeta_pointer << '\n';
723  }
724  for (int r = zeta_pointer; r >= 0 && (!contexts_generated.empty()); --r) {
725  int v_r = at(lane, r);
726  if (verbose) std::cerr << " r = " << r << ", $v_r$ = ";
727  if (v_r < 0) {
728  if (verbose) {
729  if (v_r == MARKER) std::cerr << "marker\n";
730  else if (v_r == ZERO) std::cerr << "zero\n";
731  }
732  continue;
733  }
734  int tau_r_addr = v_r;
735  if (verbose) {
736  std::cerr << "$\\tau_r$ = " << tau_r_addr << '\n';
737  std::cerr << " CONTEXTS_GENERATED = ";
738  print_set(contexts_generated, *grammar);
739  std::cerr << "\n CONTEXTS_$\\tau_r$ = ";
740  print_set(at(contexts, tau_r_addr), *grammar);
741  std::cerr << "\n CONTEXTS_GENERATED <- CONTEXTS_GENERATED - CONTEXTS_$\\tau_r$";
742  }
743  subtract_from(contexts_generated, at(contexts, tau_r_addr));
744  if (verbose) {
745  std::cerr << "\n CONTEXTS_GENERATED = ";
746  print_set(contexts_generated, *grammar);
747  std::cerr << "\n CONTEXTS_$\\tau_r$ <- CONTEXTS_$\\tau_r$ U CONTEXTS_GENERATED";
748  }
749  unite_with(at(contexts, tau_r_addr), contexts_generated);
750  if (verbose) {
751  std::cerr << "\n CONTEXTS_$\\tau_r$ = ";
752  print_set(at(contexts, tau_r_addr), *grammar);
753  std::cerr << "\n";
754  }
755  }
756 }
757 
758 static void deal_with_tests_failed(
759  int& num_originators_failed,
760  int& first_originator_failed,
761  int zeta_prime_addr,
762  bool& tests_failed,
763  std::vector<int>& lane,
764  std::vector<int>& in_lane,
765  int zeta_addr,
766  std::vector<int>& stack,
767  bool verbose
768  ) {
769  if (verbose) std::cerr << " Dealing with test failures\n";
770  if (num_originators_failed == 0) {
771  if (verbose) std::cerr << " " << zeta_prime_addr << " is the first originator of " << zeta_addr << " to fail the tests\n";
772  first_originator_failed = zeta_prime_addr;
773  if (verbose) std::cerr << " pushing " << zeta_prime_addr << " onto LANE:\n ";
774  lane.push_back(zeta_prime_addr);
775  if (verbose) print_stack(lane);
776  at(in_lane, zeta_prime_addr) = size(lane) - 1;
777  if (verbose) std::cerr << " IN_LANE(" << zeta_prime_addr << ") <- ON\n";
778  tests_failed = true;
779  if (verbose) std::cerr << " TESTS_FAILED <- ON\n";
780  } else if (num_originators_failed == 1) {
781  if (verbose) std::cerr << " " << zeta_prime_addr << " is the second originator of " << zeta_addr << " to fail the tests\n";
782  int zeta_double_prime_addr = first_originator_failed;
783  if (verbose) std::cerr << " the first was " << zeta_double_prime_addr << '\n';
784  TEUCHOS_ASSERT(at(lane, size(lane) - 1) == zeta_double_prime_addr);
785  TEUCHOS_ASSERT(at(lane, size(lane) - 2) == zeta_addr);
786  if (verbose) std::cerr << " pop LANE, push {marker, " << zeta_double_prime_addr << "} onto it:\n ";
787  lane.resize(lane.size() - 1);
788  lane.push_back(MARKER);
789  lane.push_back(zeta_double_prime_addr);
790  if (verbose) print_stack(lane);
791  if (verbose) std::cerr << " push {marker, " << zeta_prime_addr << "} onto STACK:\n ";
792  stack.push_back(MARKER);
793  stack.push_back(zeta_prime_addr);
794  if (verbose) print_stack(stack);
795  } else {
796  if (verbose) std::cerr << " " << zeta_prime_addr << " is the third or later originator of " << zeta_addr << " to fail the tests\n";
797  if (verbose) std::cerr << " pushing " << zeta_prime_addr << " onto STACK:\n ";
798  stack.push_back(zeta_prime_addr);
799  if (verbose) print_stack(stack);
800  }
801  ++num_originators_failed;
802 }
803 
804 static void heuristic_propagation_of_context_sets(
805  int tau_addr,
806  Contexts& contexts,
807  std::vector<bool>& complete,
808  StateConfigs const& scs,
809  StatesInProgress const& states,
810  Graph const& states2scs,
811  Configs const& cs,
812  GrammarPtr grammar
813  ) {
814  const StateConfig& tau = at(scs, tau_addr);
815  const StateInProgress& state = *at(states, tau.state);
816  int config_i = at(state.configs, tau.config_in_state);
817  const Config& config = at(cs, config_i);
818  if (config.dot != 0) return;
819  const Grammar::Production& prod = at(grammar->productions, config.production);
820  for (int cis_j = 0; cis_j < size(state.configs); ++cis_j) {
821  int config_j = at(state.configs, cis_j);
822  if (config_j == config_i) continue;
823  const Config& config2 = at(cs, config_j);
824  if (config2.dot != 0) continue;
825  const Grammar::Production& prod2 = at(grammar->productions, config2.production);
826  if (prod.lhs != prod2.lhs) continue;
827  int tau_prime_addr = at(states2scs, tau.state, cis_j);
828  at(contexts, tau_prime_addr) = at(contexts, tau_addr);
829  at(complete, tau_prime_addr) = true;
830  }
831 }
832 
833 /* Here it is! The magical algorithm described by a flowchart in
834  Figure 7 of David Pager's paper. */
835 static void compute_context_set(
836  int zeta_j_addr,
837  Contexts& contexts,
838  std::vector<bool>& complete,
839  StateConfigs const& scs,
840  Graph const& originator_graph,
841  StatesInProgress const& states,
842  Graph const& states2scs,
843  Configs const& cs,
844  std::vector<FirstSet> const& first_sets,
845  GrammarPtr grammar,
846  bool verbose
847  ) {
848  if (verbose) std::cerr << "Computing context set for $\\zeta_j$ = " << zeta_j_addr << "...\n";
849  if (verbose) std::cerr << "BEGIN PROGRAM\n";
850  if (at(complete, zeta_j_addr)) {
851  if (verbose) std::cerr << zeta_j_addr << " was already complete!\nEND PROGRAM\n\n";
852  return;
853  }
854  std::vector<int> stack;
855  // need random access, inner insert, which std::stack doesn't provide
856  std::vector<int> lane;
857  std::vector<int> in_lane = make_vector<int>(size(scs), -1);
858  lane.push_back(zeta_j_addr);
859  at(in_lane, zeta_j_addr) = size(lane) - 1;
860  bool tests_failed = false;
861  Context contexts_generated;
862  if (verbose) {
863  std::cerr << "Initial LANE:";
864  print_stack(lane);
865  }
866  while (true) {
867  TEUCHOS_ASSERT(!lane.empty());
868  int zeta_addr = lane.back();
869  if (verbose) {
870  std::cerr << "Top of LANE is $\\zeta$ = " << zeta_addr << '\n';
871  }
872  int zeta_pointer = size(lane) - 1;
873  if (verbose) std::cerr << "$\\zeta$-POINTER <- " << zeta_pointer << '\n';
874  int num_originators_failed = 0;
875  int first_originator_failed = -1;
876  if (verbose) std::cerr << "DO_LOOP:\n";
877  /* DO_LOOP */
878  for (int i = 0; i < count_edges(originator_graph, zeta_addr); ++i) {
879  int zeta_prime_addr = at(originator_graph, zeta_addr, i);
880  if (verbose) {
881  std::cerr << "Next originator of $\\zeta$ = " << zeta_addr << " is $\\zeta'$ = " << zeta_prime_addr << '\n';
882  }
883  std::vector<int> gamma = get_follow_string(zeta_prime_addr, scs, states, cs, grammar);
884  if (verbose) {
885  std::cerr << " FOLLOW string of $\\zeta'$ = " << zeta_prime_addr << " is ";
886  print_string(gamma, grammar);
887  std::cerr << '\n';
888  }
889  FirstSet gamma_first = get_first_set_of_string(gamma, first_sets);
890  if (verbose) {
891  std::cerr << " FIRST set of ";
892  print_string(gamma, grammar);
893  std::cerr << " is ";
894  print_set(gamma_first, *grammar);
895  std::cerr << "\n";
896  }
897  if (has_non_null_terminal_descendant(gamma_first)) { // test A
898  if (verbose) {
899  std::cerr << " ";
900  print_string(gamma, grammar);
901  std::cerr << " has a non-null terminal descendant\n";
902  }
903  contexts_generated = get_contexts(gamma_first);
904  if (verbose) {
905  std::cerr << " CONTEXTS_GENERATED = ";
906  print_set(contexts_generated, *grammar);
907  std::cerr << " = 1-heads of non-null descendants of ";
908  print_string(gamma, grammar);
909  std::cerr << '\n';
910  }
911  if (gamma_first.count(FIRST_NULL)) {
912  if (verbose) {
913  std::cerr << " ";
914  print_string(gamma, grammar);
915  std::cerr << " has a null terminal descendant\n";
916  }
917  if (at(complete, zeta_prime_addr)) {
918  unite_with(contexts_generated, at(contexts, zeta_prime_addr));
919  context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
920  verbose, grammar);
921  } else if (-1 == at(in_lane, zeta_prime_addr)) {
922  context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
923  verbose, grammar);
924  /* TRACE_FURTHER */
925  deal_with_tests_failed(num_originators_failed, first_originator_failed,
926  zeta_prime_addr, tests_failed, lane, in_lane, zeta_addr, stack,
927  verbose);
928  } else {
929  throw ParserBuildFail("error: grammar is ambiguous.\n");
930  }
931  } else {
932  context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
933  verbose, grammar);
934  }
935  } else {
936  if (verbose) {
937  std::cerr << " ";
938  print_string(gamma, grammar);
939  std::cerr << " does not have a non-null terminal descendant\n";
940  }
941  if (at(complete, zeta_prime_addr)) { // test B
942  if (verbose) std::cerr << " COMPLETE(" << zeta_prime_addr << ") is ON\n";
943  contexts_generated = at(contexts, zeta_prime_addr);
944  context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
945  verbose, grammar);
946  } else {
947  if (verbose) std::cerr << " COMPLETE(" << zeta_prime_addr << ") is OFF\n";
948  if (-1 != at(in_lane, zeta_prime_addr)) { // test C
949  if (verbose) std::cerr << " IN_LANE(" << zeta_prime_addr << ") is ON\n";
950  move_markers(lane, in_lane, zeta_prime_addr, zeta_pointer, tests_failed);
951  contexts_generated = at(contexts, zeta_prime_addr);
952  context_adding_routine(lane, zeta_pointer, contexts_generated, contexts,
953  verbose, grammar);
954  } else {
955  if (verbose) std::cerr << " IN_LANE(" << zeta_prime_addr << ") is OFF\n";
956  deal_with_tests_failed(num_originators_failed, first_originator_failed,
957  zeta_prime_addr, tests_failed, lane, in_lane, zeta_addr, stack,
958  verbose);
959  }
960  }
961  }
962  } /* END DO_LOOP */
963  if (verbose) std::cerr << "END DO_LOOP\n";
964  if (tests_failed) {
965  if (verbose) {
966  std::cerr << " TESTS_FAILED was on, turning it off and going to next configuration\n";
967  }
968  tests_failed = false;
969  continue;
970  }
971  bool keep_lane_popping = true;
972  if (verbose) std::cerr << " Start LANE popping\n";
973  while (keep_lane_popping) { // LANE popping loop
974  TEUCHOS_ASSERT(!lane.empty());
975  if (verbose) {
976  std::cerr << " LANE:";
977  print_stack(lane);
978  }
979  if (at(lane, size(lane) - 1) == MARKER) {
980  if (verbose) std::cerr << " Top of LANE is a marker\n";
981  if (verbose) std::cerr << " Start STACK popping\n";
982  while (true) { // STACK popping loop
983  TEUCHOS_ASSERT(!stack.empty());
984  if (verbose) {
985  std::cerr << " STACK:";
986  print_stack(stack);
987  std::cerr << " LANE:";
988  print_stack(lane);
989  }
990  if (stack.back() == MARKER) {
991  if (verbose) std::cerr << " Top of STACK is a marker, pop STACK and LANE\n";
992  resize(stack, size(stack) - 1);
993  resize(lane, size(lane) - 1);
994  break; // out of STACK popping, back into LANE popping
995  } else if (at(complete, stack.back())) {
996  if (verbose) std::cerr << " Top of STACK is has COMPLETE flag, pop STACK\n";
997  resize(stack, size(stack) - 1);
998  // back into STACK popping
999  } else {
1000  int addr = stack.back();
1001  if (verbose) std::cerr << " Top of STACK is " << addr << ", pop STACK\n";
1002  resize(stack, size(stack) - 1);
1003  if (verbose) std::cerr << " Push " << addr << " onto LANE\n";
1004  lane.push_back(addr);
1005  if (verbose) std::cerr << " IN_LANE(" << addr << ") <- ON\n";
1006  at(in_lane, addr) = size(lane) - 1;
1007  keep_lane_popping = false;
1008  break; // out of STACK and LANE popping, into top-level loop
1009  } // end STACK top checks
1010  } // end STACK popping loop
1011  } else if (at(lane, size(lane) - 1) == ZERO) {
1012  if (verbose) std::cerr << " Top of LANE is a zero\n";
1013  if (verbose) std::cerr << " Pop LANE\n";
1014  resize(lane, size(lane) - 1); // pop LANE
1015  // back to top of LANE popping loop
1016  } else { // top of LANE neither marker nor zero
1017  int tau_addr = lane.back();
1018  if (verbose) std::cerr << " Top of LANE is $\\tau$ = " << tau_addr << "\n";
1019  at(in_lane, tau_addr) = -1;
1020  if (verbose) std::cerr << " IN_LANE(" << tau_addr << ") <- OFF\n";
1021  at(complete, tau_addr) = true;
1022  if (verbose) std::cerr << " COMPLETE(" << tau_addr << ") <- ON\n";
1023  if (verbose) std::cerr << " HEURISTIC PROPAGATION OF CONTEXT SETS\n";
1024  heuristic_propagation_of_context_sets(
1025  tau_addr, contexts, complete,
1026  scs, states, states2scs, cs, grammar);
1027  if (size(lane) == 1 && at(lane, 0) == zeta_j_addr) {
1028  if (verbose) std::cerr << "END PROGRAM\n\n";
1029  return;
1030  }
1031  if (verbose) std::cerr << " Pop LANE\n";
1032  resize(lane, size(lane) - 1); // pop LANE
1033  // back to top of LANE popping loop
1034  } // end top of LANE checks
1035  } // end LANE popping loop
1036  } // end top-level while(1) loop
1037 }
1038 
1039 static std::vector<bool> determine_adequate_states(
1040  StatesInProgress const& states,
1041  GrammarPtr grammar,
1042  bool verbose,
1043  std::ostream& os) {
1044  std::vector<bool> out = make_vector<bool>(size(states));
1045  for (int s_i = 0; s_i < size(states); ++s_i) {
1046  const StateInProgress& state = *at(states, s_i);
1047  bool state_is_adequate = true;
1048  for (int a_i = 0; a_i < size(state.actions); ++a_i) {
1049  const ActionInProgress& action = at(state.actions, a_i);
1050  if (action.action.kind == ACTION_SHIFT &&
1051  is_nonterminal(*grammar, *(action.context.begin()))) {
1052  continue;
1053  }
1054  for (int a_j = a_i + 1; a_j < size(state.actions); ++a_j) {
1055  const ActionInProgress& action2 = at(state.actions, a_j);
1056  if (action2.action.kind == ACTION_SHIFT &&
1057  is_nonterminal(*grammar, *(action2.context.begin()))) {
1058  continue;
1059  }
1060  if (intersects(action2.context, action.context)) {
1061  if (verbose) {
1062  const ActionInProgress* ap1 = &action;
1063  const ActionInProgress* ap2 = &action2;
1064  if (ap1->action.kind == ACTION_SHIFT) {
1065  std::swap(ap1, ap2);
1066  }
1067  TEUCHOS_ASSERT(ap1->action.kind == ACTION_REDUCE);
1068  os << "shift-reduce conflict in state " << s_i << ":\n";
1069  os << "reduce ";
1070  const Grammar::Production& prod = at(grammar->productions, ap1->action.production);
1071  const std::string& lhs_name = at(grammar->symbol_names, prod.lhs);
1072  os << lhs_name << " ::=";
1073  for (int rhs_i = 0; rhs_i < size(prod.rhs); ++rhs_i) {
1074  int rhs_symb = at(prod.rhs, rhs_i);
1075  const std::string& rhs_symb_name = at(grammar->symbol_names, rhs_symb);
1076  os << " " << rhs_symb_name;
1077  }
1078  int shift_symb = *(ap2->context.begin());
1079  const std::string& shift_name = at(grammar->symbol_names, shift_symb);
1080  os << "\nshift " << shift_name << '\n';
1081  }
1082  state_is_adequate = false;
1083  break;
1084  }
1085  }
1086  if (!state_is_adequate) break;
1087  }
1088  at(out, s_i) = state_is_adequate;
1089  }
1090  if (verbose) os << '\n';
1091  return out;
1092 }
1093 
1094 ParserInProgress draft_lalr1_parser(GrammarPtr grammar, bool verbose) {
1095  ParserInProgress out;
1096  Configs& cs = out.configs;
1097  StatesInProgress& states = out.states;
1098  StateConfigs& scs = out.state_configs;
1099  Graph& states2scs = out.states2state_configs;
1100  out.grammar = grammar;
1101  cs = make_configs(*grammar);
1102  Graph lhs2cs = get_left_hand_sides_to_start_configs(cs, *grammar);
1103  if (verbose) std::cerr << "Building LR(0) parser\n";
1104  states = make_lr0_parser(cs, *grammar, lhs2cs);
1105  scs = form_state_configs(states);
1106  states2scs = form_states_to_state_configs(scs, states);
1107  if (verbose) print_graphviz("lr0.dot", out, true, std::cerr);
1108  if (verbose) std::cerr << "Checking adequacy of LR(0) machine\n";
1109  std::vector<bool> adequate = determine_adequate_states(states, grammar, verbose,
1110  std::cerr);
1111  if (*(std::min_element(adequate.begin(), adequate.end()))) {
1112  if (verbose) std::cerr << "The grammar is LR(0)!\n";
1113  return out;
1114  }
1115  std::vector<bool> complete = make_vector<bool>(size(scs), false);
1116  std::vector<Context> contexts = make_vector<Context>(size(scs));
1117  int accept_prod_i = get_accept_production(*grammar);
1118  /* initialize the accepting state-configs as described in
1119  footnote 8 at the bottom of page 37 */
1120  for (int sc_i = 0; sc_i < size(scs); ++sc_i) {
1121  StateConfig& sc = at(scs, sc_i);
1122  StateInProgress& state = *at(states, sc.state);
1123  int config_i = at(state.configs, sc.config_in_state);
1124  Config& config = at(cs, config_i);
1125  if (config.production == accept_prod_i) {
1126  at(complete, sc_i) = true;
1127  at(contexts, sc_i).insert(get_end_terminal(*grammar));
1128  }
1129  }
1130  Graph og = make_originator_graph(scs, states, states2scs, cs, grammar);
1131  if (verbose) std::cerr << "Originator Graph:\n";
1132  if (verbose) std::cerr << og << '\n';
1133  std::vector<FirstSet> first_sets = compute_first_sets(*grammar, verbose);
1134  /* compute context sets for all state-configs associated with reduction
1135  actions that are part of an inadequate state */
1136  for (int s_i = 0; s_i < size(states); ++s_i) {
1137  if (at(adequate, s_i)) continue;
1138  StateInProgress& state = *at(states, s_i);
1139  for (int cis_i = 0; cis_i < size(state.configs); ++cis_i) {
1140  int config_i = at(state.configs, cis_i);
1141  const Config& config = at(cs, config_i);
1142  const Grammar::Production& prod = at(grammar->productions, config.production);
1143  if (config.dot != size(prod.rhs)) continue;
1144  int zeta_j_addr = at(states2scs, s_i, cis_i);
1145  compute_context_set(zeta_j_addr, contexts, complete, scs,
1146  og, states, states2scs, cs, first_sets, grammar, verbose);
1147  }
1148  }
1149  /* update the context sets for all reduction state-configs
1150  which are marked complete, even if they aren't in inadequate states */
1151  for (int s_i = 0; s_i < size(states); ++s_i) {
1152  StateInProgress& state = *at(states, s_i);
1153  for (int cis_i = 0; cis_i < size(state.configs); ++cis_i) {
1154  int sc_i = at(states2scs, s_i, cis_i);
1155  if (!at(complete, sc_i)) continue;
1156  int config_i = at(state.configs, cis_i);
1157  Config& config = at(cs, config_i);
1158  const Grammar::Production& prod = at(grammar->productions, config.production);
1159  if (config.dot != size(prod.rhs)) continue;
1160  for (int a_i = 0; a_i < size(state.actions); ++a_i) {
1161  ActionInProgress& action = at(state.actions, a_i);
1162  if (action.action.kind == ACTION_REDUCE &&
1163  action.action.production == config.production) {
1164  action.context = at(contexts, sc_i);
1165  }
1166  }
1167  }
1168  }
1169  if (verbose) std::cerr << "Checking adequacy of LALR(1) machine\n";
1170  adequate = determine_adequate_states(states, grammar, verbose, std::cerr);
1171  if (!(*(std::min_element(adequate.begin(), adequate.end())))) {
1172  std::stringstream ss;
1173  ss << "error: The grammar is not LALR(1).\n";
1174  determine_adequate_states(states, grammar, true, ss);
1175  print_graphviz("error.dot", out, true, ss);
1176  std::string s = ss.str();
1177  throw ParserBuildFail(s);
1178  }
1179  if (verbose) std::cerr << "The grammar is LALR(1)!\n";
1180  if (verbose) print_graphviz("lalr1.dot", out, true, std::cerr);
1181  return out;
1182 }
1183 
1184 Parser accept_parser(ParserInProgress const& pip) {
1185  const StatesInProgress& sips = pip.states;
1186  GrammarPtr grammar = pip.grammar;
1187  Parser out(grammar, size(sips));
1188  for (int s_i = 0; s_i < size(sips); ++s_i) {
1189  add_state(out);
1190  }
1191  for (int s_i = 0; s_i < size(sips); ++s_i) {
1192  const StateInProgress& sip = *at(sips, s_i);
1193  for (int a_i = 0; a_i < size(sip.actions); ++a_i) {
1194  const ActionInProgress& action = at(sip.actions, a_i);
1195  if (action.action.kind == ACTION_SHIFT &&
1196  is_nonterminal(*grammar, *(action.context.begin()))) {
1197  int nt = as_nonterminal(*grammar, *(action.context.begin()));
1198  add_nonterminal_action(out, s_i, nt, action.action.next_state);
1199  } else {
1200  for (Context::const_iterator it = action.context.begin();
1201  it != action.context.end(); ++it) {
1202  int terminal = *it;
1203  TEUCHOS_ASSERT(is_terminal(*grammar, terminal));
1204  add_terminal_action(out, s_i, terminal, action.action);
1205  }
1206  }
1207  }
1208  }
1209  return out;
1210 }
1211 
1212 ParserBuildFail::ParserBuildFail(const std::string& msg):
1213  std::invalid_argument(msg) {
1214 }
1215 
1216 Parser make_lalr1_parser(GrammarPtr grammar, bool verbose) {
1217  ParserInProgress pip = draft_lalr1_parser(grammar, verbose);
1218  return accept_parser(pip);
1219 }
1220 
1221 }
Parser make_lalr1_parser(GrammarPtr grammar, bool verbose)
Tries to create LALR(1) parser tables for a given grammar.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
int size(const Comm< Ordinal > &comm)
Get the number of processes in the communicator.
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos, as well as a number of utility routines.
TypeTo as(const TypeFrom &t)
Convert from one value type to another.
Smart reference counting pointer class for automatic garbage collection.
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
void swap(Teuchos::any &a, Teuchos::any &b)
Special swap for other code to find via Argument Dependent Lookup.