Teuchos - Trilinos Tools Package  Version of the Day
Teuchos_Language.cpp
1 #include "Teuchos_Language.hpp"
2 
3 #include <set>
4 #include <iostream>
5 #include <sstream>
6 #include <cstdlib>
7 #include <cstdarg>
8 
9 #include "Teuchos_vector.hpp"
10 #include "Teuchos_regex.hpp"
11 #include "Teuchos_Parser.hpp"
12 
13 namespace Teuchos {
14 
15 void Language::Token::operator()(std::string const& name_in, std::string const& regex_in) {
16  name = name_in;
17  regex = regex_in;
18 }
19 
20 Language::RHSBuilder::RHSBuilder(Production& prod_in):
21  prod(prod_in) {
22 }
23 
24 Language::RHSBuilder& Language::RHSBuilder::operator,(std::string const& rhs_item) {
25  prod.rhs.push_back(rhs_item);
26  return *this;
27 }
28 
29 Language::RHSBuilder& Language::RHSBuilder::operator>>(std::string const& rhs_item) {
30  prod.rhs.push_back(rhs_item);
31  return *this;
32 }
33 
34 Language::RHSBuilder Language::Production::operator()(std::string const& lhs_in) {
35  lhs = lhs_in;
36  return Language::RHSBuilder(*this);
37 }
38 
39 GrammarPtr make_grammar(Language const& language) {
40  std::map<std::string, int> symbol_map;
41  int nterminals = 0;
42  for (Language::Tokens::const_iterator it = language.tokens.begin();
43  it != language.tokens.end(); ++it) {
44  const Language::Token& token = *it;
45  symbol_map[token.name] = nterminals++;
46  }
47  int nsymbols = nterminals;
48  for (Language::Productions::const_iterator it = language.productions.begin();
49  it != language.productions.end(); ++it) {
50  const Language::Production& production = *it;
51  if (symbol_map.count(production.lhs)) continue;
52  symbol_map[production.lhs] = nsymbols++;
53  }
54  RCP<Grammar> out(new Grammar());
55  out->nsymbols = nsymbols;
56  out->nterminals = nterminals;
57  for (Language::Productions::const_iterator it = language.productions.begin();
58  it != language.productions.end(); ++it) {
59  const Language::Production& lang_prod = *it;
60  out->productions.push_back(Grammar::Production());
61  Grammar::Production& gprod = out->productions.back();
62  TEUCHOS_ASSERT(symbol_map.count(lang_prod.lhs));
63  gprod.lhs = symbol_map[lang_prod.lhs];
64  for (Language::RHS::const_iterator it2 = lang_prod.rhs.begin();
65  it2 != lang_prod.rhs.end(); ++it2) {
66  const std::string& lang_symb = *it2;
67  TEUCHOS_TEST_FOR_EXCEPTION(!symbol_map.count(lang_symb), ParserFail,
68  "RHS entry \"" << lang_symb <<
69  "\" is neither a nonterminal (LHS of a production) nor a token!\n");
70  gprod.rhs.push_back(symbol_map[lang_symb]);
71  }
72  }
73  out->symbol_names = make_vector<std::string>(nsymbols);
74  for (std::map<std::string, int>::const_iterator it = symbol_map.begin();
75  it != symbol_map.end(); ++it) {
76  const std::pair<std::string, int>& pair = *it;
77  at(out->symbol_names, pair.second) = pair.first;
78  }
79  add_end_terminal(*out);
80  add_accept_production(*out);
81  return out;
82 }
83 
84 std::ostream& operator<<(std::ostream& os, Language const& lang) {
85  for (Language::Tokens::const_iterator it = lang.tokens.begin();
86  it != lang.tokens.end(); ++it) {
87  const Language::Token& token = *it;
88  os << "token " << token.name << " regex \'" << token.regex << "\'\n";
89  }
90  std::set<std::string> nonterminal_set;
91  std::vector<std::string> nonterminal_list;
92  for (Language::Productions::const_iterator it = lang.productions.begin();
93  it != lang.productions.end(); ++it) {
94  const Language::Production& prod = *it;
95  if (!nonterminal_set.count(prod.lhs)) {
96  nonterminal_set.insert(prod.lhs);
97  nonterminal_list.push_back(prod.lhs);
98  }
99  }
100  for (std::vector<std::string>::const_iterator it = nonterminal_list.begin();
101  it != nonterminal_list.end(); ++it) {
102  const std::string& nonterminal = *it;
103  std::stringstream ss;
104  ss << nonterminal << " ::=";
105  std::string lead = ss.str();
106  os << lead;
107  for (std::string::iterator it2 = lead.begin(); it2 != lead.end(); ++it2) {
108  *it2 = ' ';
109  }
110  bool first = true;
111  for (Language::Productions::const_iterator it2 = lang.productions.begin();
112  it2 != lang.productions.end(); ++it2) {
113  const Language::Production& prod = *it2;
114  if (prod.lhs != nonterminal) continue;
115  if (first) first = false;
116  else os << " |\n" << lead;
117  for (Language::RHS::const_iterator it3 = prod.rhs.begin();
118  it3 != prod.rhs.end(); ++it3) {
119  const std::string& symb = *it3;
120  if (symb == "|") os << " '|'";
121  else os << " " << symb;
122  }
123  }
124  os << "\n";
125  }
126  os << "\n";
127  return os;
128 }
129 
130 void make_lexer(FiniteAutomaton& result, Language const& language) {
131  using std::swap;
132  for (int i = 0; i < size(language.tokens); ++i) {
133  const std::string& name = at(language.tokens, i).name;
134  const std::string& regex = at(language.tokens, i).regex;
135  if (i == 0) {
136  regex::make_dfa(result, name, regex, i);
137  } else {
138  FiniteAutomaton b;
139  regex::make_dfa(b, name, regex, i);
140  unite(result, result, b);
141  }
142  }
143  make_deterministic(result, result);
144  simplify(result, result);
145 }
146 
147 static void make_indent_info(IndentInfo& out, Language const& language) {
148  out.is_sensitive = false;
149  out.indent_token = -1;
150  out.dedent_token = -1;
151  out.eqdent_token = -1;
152  out.nodent_token = -1;
153  for (int tok_i = 0; tok_i < size(language.tokens); ++tok_i) {
154  const Language::Token& token = at(language.tokens, tok_i);
155  if (token.regex == "]INDENT[") {
156  TEUCHOS_TEST_FOR_EXCEPTION(out.indent_token != -1, ParserFail,
157  "error: Language has two or more ]INDENT[ tokens\n");
158  out.indent_token = tok_i;
159  out.is_sensitive = true;
160  } else if (token.regex == "]DEDENT[") {
161  TEUCHOS_TEST_FOR_EXCEPTION(out.dedent_token != -1, ParserFail,
162  "error: Language has two or more ]DEDENT[ tokens\n");
163  out.dedent_token = tok_i;
164  } else if (token.regex == "]EQDENT[") {
165  TEUCHOS_TEST_FOR_EXCEPTION(out.eqdent_token != -1, ParserFail,
166  "error: Language has two or more ]EQDENT[ tokens\n");
167  out.eqdent_token = tok_i;
168  } else if (token.regex == "]NODENT[") {
169  TEUCHOS_TEST_FOR_EXCEPTION(out.nodent_token != -1, ParserFail,
170  "error: Language has two or more ]NODENT[ tokens\n");
171  out.nodent_token = tok_i;
172  }
173  }
174  TEUCHOS_TEST_FOR_EXCEPTION(out.is_sensitive && out.indent_token == -1,
175  ParserFail,
176  "error: Indentation-sensitive language has no ]INDENT[ token\n");
177  TEUCHOS_TEST_FOR_EXCEPTION(out.is_sensitive && out.dedent_token == -1,
178  ParserFail,
179  "error: Indentation-sensitive language has no ]DEDENT[ token\n");
180  TEUCHOS_TEST_FOR_EXCEPTION(out.is_sensitive && out.eqdent_token == -1,
181  ParserFail,
182  "error: Indentation-sensitive language has no ]EQDENT[ token\n");
183  TEUCHOS_TEST_FOR_EXCEPTION(out.is_sensitive && out.nodent_token == -1,
184  ParserFail,
185  "error: Indentation-sensitive language has no ]NODENT[ token\n");
187  (out.indent_token < out.nodent_token ||
188  out.dedent_token < out.nodent_token ||
189  out.eqdent_token < out.nodent_token),
190  ParserFail,
191  "error: ]NODENT[ needs to come before all other indent tokens\n");
192 }
193 
195  RCP<ReaderTables> out(new ReaderTables());
196  make_lexer(out->lexer, language);
197  make_indent_info(out->indent_info, language);
198  GrammarPtr grammar = make_grammar(language);
199  out->parser = make_lalr1_parser(grammar);
200  return out;
201 }
202 
203 }
Parser make_lalr1_parser(GrammarPtr grammar, bool verbose)
Tries to create LALR(1) parser tables for a given grammar.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
Tries to create LALR(1) parser tables for a given grammar.
Declares Teuchos::Parser, ParserFail and make_lalr1_parser.
The main class for users to define a language using TeuchosParser.
Parser and lexer tables specifying how to read a Language.
ReaderTablesPtr make_reader_tables(Language const &language)
constructs ReaderTables for the given Language.
Tokens tokens
vector of tokens
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos, as well as a number of utility routines.
void make_lexer(FiniteAutomaton &result, Language const &language)
construct a lexer for the Language tokens.
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
Declares Teuchos::Language.