Teuchos - Trilinos Tools Package  Version of the Day
Teuchos_Reader.cpp
1 #include "Teuchos_Reader.hpp"
2 
3 #include <iostream>
4 #include <sstream>
5 #include <fstream>
6 #include <ios>
7 #include <cstdlib>
8 #include <set>
9 
10 #include "Teuchos_string.hpp"
11 #include "Teuchos_vector.hpp"
12 #include "Teuchos_Parser.hpp"
13 
14 namespace Teuchos {
15 
16 namespace {
17 
18 void print_indicator(std::ostream& os, std::string const& above, std::size_t pos) {
19  for (std::size_t i = 0; i < pos; ++i) {
20  if (above.at(i) == '\t') os << '\t';
21  else os << ' ';
22  }
23  os << "^\n";
24 }
25 
26 void print_underline(std::ostream& os, std::string const& above, std::size_t start, std::size_t end) {
27  for (std::size_t i = 0; i < start; ++i) {
28  if (above.at(i) == '\t') os << '\t';
29  else os << ' ';
30  }
31  for (std::size_t i = start; i < end; ++i) os << '~';
32  os << '\n';
33 }
34 
35 } // end anonymous namespace
36 
37 Reader::IndentStackEntry::IndentStackEntry(std::size_t l, std::size_t s, std::size_t e):
38  line(l),start_length(s),end_length(e) {
39 }
40 
41 void Reader::at_token() {
42  bool done = false;
43  /* this can loop arbitrarily as reductions are made,
44  because they don't consume the token */
45  while (!done) {
46  const Action& parser_action = get_action(parser, parser_state, lexer_token);
47  if (parser_action.kind == ACTION_NONE) {
48  std::stringstream ss;
49  ss << "error: Parser failure at line " << line;
50  ss << " column " << column << " of " << stream_name << '\n';
51  ss << line_text << '\n';
52  print_indicator(ss, line_text, line_text.size());
53  std::set<std::string> expect_names;
54  for (int expect_token = 0;
55  expect_token < grammar->nterminals; ++expect_token) {
56  const Action& expect_action = get_action(parser, parser_state, expect_token);
57  if (expect_action.kind != ACTION_NONE) {
58  expect_names.insert(at(grammar->symbol_names, expect_token));
59  }
60  }
61  ss << "Expected one of {";
62  for (std::set<std::string>::iterator it = expect_names.begin();
63  it != expect_names.end(); ++it) {
64  if (it != expect_names.begin()) ss << ", ";
65  if (*it == ",") ss << "','";
66  else ss << *it;
67  }
68  ss << "}\n";
69  ss << "Got: " << at(grammar->symbol_names, lexer_token) << '\n';
70  ss << "Parser was in state " << parser_state << '\n';
71  throw ParserFail(ss.str());
72  } else if (parser_action.kind == ACTION_SHIFT) {
73  Teuchos::any shift_result;
74  this->at_shift(shift_result, lexer_token, lexer_text);
75  add_back(value_stack, shift_result);
76  done = true;
77  } else if (parser_action.kind == ACTION_REDUCE) {
78  if (parser_action.production == get_accept_production(*grammar)) {
79  did_accept = true;
80  return;
81  }
82  const Grammar::Production& prod = at(grammar->productions, parser_action.production);
83  reduction_rhs.clear();
84  for (int i = 0; i < size(prod.rhs); ++i) {
85  add_back(reduction_rhs, at(value_stack, size(value_stack) - size(prod.rhs) + i));
86  }
87  resize(value_stack, size(value_stack) - size(prod.rhs));
88  Teuchos::any reduce_result;
89  this->at_reduce(reduce_result, parser_action.production, reduction_rhs);
90  add_back(value_stack, reduce_result);
91  } else {
92  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
93  "SERIOUS BUG: Action::kind enum value not in range\n");
94  }
95  parser_state = execute_action(parser, parser_stack, parser_action);
96  }
97 }
98 
99 void Reader::indent_mismatch() {
100  TEUCHOS_ASSERT(!indent_stack.empty());
101  const IndentStackEntry& top = indent_stack.back();
102  std::stringstream ss;
103  ss << "error: Indentation characters beginning line " << line << " of " << stream_name
104  << " don't match those beginning line " << top.line << '\n';
105  ss << "It is strongly recommended not to mix tabs and spaces in indentation-sensitive formats\n";
106  throw ParserFail(ss.str());
107 }
108 
109 void Reader::at_token_indent() {
110  if (!sensing_indent || lexer_token != tables->indent_info.nodent_token) {
111  at_token();
112  return;
113  }
114  TEUCHOS_ASSERT(at(lexer_text, 0) == '\n');
115  std::string lexer_indent = lexer_text.substr(1, std::string::npos);
116  std::size_t minlen = std::min(lexer_indent.length(), indent_text.length());
117  if (lexer_indent.length() > indent_text.length()) {
118  if (0 != lexer_indent.compare(0, indent_text.length(), indent_text)) {
119  indent_mismatch();
120  }
121  indent_stack.push_back(IndentStackEntry(line, indent_text.length(), lexer_indent.length()));
122  indent_text = lexer_indent;
123  lexer_token = tables->indent_info.indent_token;
124  at_token();
125  } else if (lexer_indent.length() < indent_text.length()) {
126  if (0 != indent_text.compare(0, lexer_indent.length(), lexer_indent)) {
127  indent_mismatch();
128  }
129  bool first = true;
130  while (!indent_stack.empty()) {
131  const IndentStackEntry& top = indent_stack.back();
132  if (top.end_length <= minlen) break;
133  indent_stack.pop_back();
134  lexer_token = tables->indent_info.dedent_token;
135  at_token();
136  if (first) {
137  lexer_text.clear();
138  first = false;
139  }
140  }
141  if (first) lexer_text.clear();
142  indent_text = lexer_indent;
143  } else {
144  if (0 != lexer_indent.compare(indent_text)) {
145  indent_mismatch();
146  }
147  lexer_token = tables->indent_info.eqdent_token;
148  at_token();
149  }
150 }
151 
152 void Reader::backtrack_to_last_accept(std::istream& stream) {
153  /* all the last_accept and backtracking is driven by
154  the "accept the longest match" rule */
155  line = last_lexer_accept_line;
156  column = last_lexer_accept_column;
157  line_text = last_lexer_accept_line_text;
158  while (lexer_text.size() > last_lexer_accept) {
159  bool ok = !stream.unget().fail();
160  TEUCHOS_ASSERT(ok);
161  resize(lexer_text, size(lexer_text) - 1);
162  }
163 }
164 
165 void Reader::reset_lexer_state() {
166  lexer_state = 0;
167  lexer_text.clear();
168  lexer_token = -1;
169 }
170 
171 void Reader::at_lexer_end(std::istream& stream) {
172  if (lexer_token == -1) {
173  std::stringstream ss;
174  if (lexer_text.find('\n') == std::string::npos) {
175  ss << "error: Could not tokenize this (line " << line;
176  ss << " column " << column << " of " << stream_name << "):\n";
177  ss << line_text << '\n';
178  TEUCHOS_ASSERT(line_text.size() >= lexer_text.size());
179  print_underline(ss, line_text, line_text.size() - lexer_text.size(), line_text.size());
180  } else {
181  ss << "error: Could not tokenize this (ends at line " << line;
182  ss << " column " << column << " of " << stream_name << "):\n";
183  ss << lexer_text << '\n';
184  }
185  throw ParserFail(ss.str());
186  }
187  backtrack_to_last_accept(stream);
188  at_token_indent();
189  reset_lexer_state();
190 }
191 
193  tables(tables_in),
194  parser(tables->parser),
195  lexer(tables->lexer),
196  grammar(get_grammar(parser))
197 {
198  TEUCHOS_ASSERT(get_determinism(lexer));
199 }
200 
201 void Reader::update_position(char c) {
202  if (c == '\n') {
203  ++line;
204  column = 1;
205  line_text.clear();
206  } else {
207  ++column;
208  }
209 }
210 
211 void Reader::read_stream(any& result, std::istream& stream, std::string const& stream_name_in) {
212  using std::swap;
213  line = 1;
214  column = 1;
215  lexer_state = 0;
216  lexer_text.clear();
217  line_text.clear();
218  lexer_token = -1;
219  parser_state = 0;
220  parser_stack.clear();
221  parser_stack.push_back(parser_state);
222  value_stack.clear();
223  did_accept = false;
224  stream_name = stream_name_in;
225  if (tables->indent_info.is_sensitive) {
226  sensing_indent = true;
227  indent_text.clear();
228  indent_stack.clear();
229  /* pretend the stream starts with a newline so we can
230  detect an INDENT on the first line. don't update the
231  line/column pointers though. */
232  char c = '\n';
233  lexer_text.push_back(c);
234  int lexer_symbol = get_symbol(c);
235  lexer_state = step(lexer, lexer_state, lexer_symbol);
236  TEUCHOS_ASSERT(lexer_state != -1);
237  lexer_token = accepts(lexer, lexer_state);
238  TEUCHOS_ASSERT(lexer_token == tables->indent_info.nodent_token);
239  last_lexer_accept = lexer_text.size();
240  last_lexer_accept_line = 1;
241  last_lexer_accept_column = 1;
242  last_lexer_accept_line_text = line_text;
243  } else {
244  sensing_indent = false;
245  }
246  char c;
247  while (stream.get(c)) {
248  if (!is_symbol(c)) {
249  std::stringstream ss;
250  ss << "error: Unexpected character code " << int(c);
251  ss << " at line " << line << " column " << column;
252  ss << " of " << stream_name << '\n';
253  if (!line_text.empty()) {
254  ss << line_text << '\n';
255  print_indicator(ss, line_text, line_text.size());
256  }
257  throw ParserFail(ss.str());
258  }
259  line_text.push_back(c);
260  lexer_text.push_back(c);
261  int lexer_symbol = get_symbol(c);
262  lexer_state = step(lexer, lexer_state, lexer_symbol);
263  if (lexer_state == -1) {
264  at_lexer_end(stream);
265  } else {
266  int token = accepts(lexer, lexer_state);
267  update_position(c);
268  if (token != -1) {
269  lexer_token = token;
270  last_lexer_accept = lexer_text.size();
271  last_lexer_accept_line = line;
272  last_lexer_accept_column = column;
273  last_lexer_accept_line_text = line_text;
274  }
275  }
276  }
277  if (last_lexer_accept < lexer_text.size()) {
278  std::stringstream ss;
279  std::string bad_str = lexer_text.substr(last_lexer_accept, std::string::npos);
280  ss << "error: Could not tokenize \"" << bad_str;
281  ss << "\" at end of " << stream_name << '\n';
282  throw ParserFail(ss.str());
283  }
284  at_lexer_end(stream);
285  lexer_token = get_end_terminal(*grammar);
286  at_token();
287  TEUCHOS_TEST_FOR_EXCEPTION(!did_accept, std::logic_error,
288  "The EOF terminal was accepted but the root nonterminal was not reduced\n"
289  "This indicates a bug in Teuchos::Reader\n");
290  TEUCHOS_ASSERT(value_stack.size() == 1);
291  swap(result, value_stack.back());
292 }
293 
294 void Reader::read_string(any& result, std::string const& string, std::string const& string_name) {
295  std::istringstream stream(string);
296  read_stream(result, stream, string_name);
297 }
298 
299 void Reader::read_file(any& result, std::string const& file_name) {
300  std::ifstream stream(file_name.c_str());
301  read_stream(result, stream, file_name);
302 }
303 
304 void Reader::at_shift(any&, int, std::string&) {
305 }
306 
307 void Reader::at_reduce(any&, int, std::vector<any>&) {
308 }
309 
310 DebugReader::DebugReader(ReaderTablesPtr tables_in, std::ostream& os_in):
311  Reader(tables_in),os(os_in)
312 {
313 }
314 
315 void DebugReader::at_shift(any& result, int token, std::string& text) {
316  std::string& text_escaped = make_any_ref<std::string>(result);
317  for (std::size_t i = 0; i < text.size(); ++i) {
318  char c = text[i];
319  switch (c) {
320  case '\n': text_escaped.append("\\n"); break;
321  case '\t': text_escaped.append("\\t"); break;
322  case '\r': text_escaped.append("\\r"); break;
323  default: text_escaped.push_back(c);
324  }
325  }
326  os << "SHIFT (" << at(grammar->symbol_names, token) << ")[" << text_escaped << "]\n";
327 }
328 
329 void DebugReader::at_reduce(any& result, int prod_i, std::vector<any>& rhs) {
330  os << "REDUCE";
331  std::string& lhs_text = make_any_ref<std::string>(result);
332  const Grammar::Production& prod = at(grammar->productions, prod_i);
333  for (int i = 0; i < size(prod.rhs); ++i) {
334  const std::string& rhs_name = at(grammar->symbol_names, at(prod.rhs, i));
335  const std::string& rhs_text = any_ref_cast<std::string>(at(rhs, i));
336  os << " (" << rhs_name << ")[" << rhs_text << "]";
337  lhs_text.append(rhs_text);
338  }
339  const std::string& lhs_name = at(grammar->symbol_names, prod.lhs);
340  os << " -> (" << lhs_name << ")[" << lhs_text << "]\n";
341 }
342 
343 } // namespace Teuchos
Reader(ReaderTablesPtr tables_in)
Constructor: accepts an RCP to ReaderTables.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
Tries to create LALR(1) parser tables for a given grammar.
void read_file(any &result, std::string const &file_name)
A convenience method for reading a file.
void read_string(any &result, std::string const &string, std::string const &string_name)
A convenience method for reading a string.
Modified boost::any class, which is a container for a templated value.
Definition: Teuchos_any.hpp:86
Declares Teuchos::Parser, ParserFail and make_lalr1_parser.
virtual void at_reduce(any &result, int production, std::vector< any > &rhs)
User-overridable REDUCE (production) method.
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos, as well as a number of utility routines.
virtual void at_shift(any &result, int token, std::string &text)
User-overridable SHIFT (token) method.
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
void read_stream(any &result, std::istream &stream, std::string const &stream_name_in)
The main method for reading a stream of text.
Declares Teuchos::Reader.