9 #include "Teuchos_vector.hpp" 10 #include "Teuchos_regex.hpp" 15 void Language::Token::operator()(std::string
const& name_in, std::string
const& regex_in) {
20 Language::RHSBuilder::RHSBuilder(Production& prod_in):
24 Language::RHSBuilder& Language::RHSBuilder::operator,(std::string
const& rhs_item) {
25 prod.rhs.push_back(rhs_item);
29 Language::RHSBuilder& Language::RHSBuilder::operator>>(std::string
const& rhs_item) {
30 prod.rhs.push_back(rhs_item);
34 Language::RHSBuilder Language::Production::operator()(std::string
const& lhs_in) {
36 return Language::RHSBuilder(*
this);
39 GrammarPtr make_grammar(Language
const& language) {
40 std::map<std::string, int> symbol_map;
42 for (Language::Tokens::const_iterator it = language.tokens.begin();
43 it != language.tokens.end(); ++it) {
44 const Language::Token& token = *it;
45 symbol_map[token.name] = nterminals++;
47 int nsymbols = nterminals;
48 for (Language::Productions::const_iterator it = language.productions.begin();
49 it != language.productions.end(); ++it) {
50 const Language::Production& production = *it;
51 if (symbol_map.count(production.lhs))
continue;
52 symbol_map[production.lhs] = nsymbols++;
54 RCP<Grammar> out(
new Grammar());
55 out->nsymbols = nsymbols;
56 out->nterminals = nterminals;
57 for (Language::Productions::const_iterator it = language.productions.begin();
58 it != language.productions.end(); ++it) {
59 const Language::Production& lang_prod = *it;
60 out->productions.push_back(Grammar::Production());
61 Grammar::Production& gprod = out->productions.back();
63 gprod.lhs = symbol_map[lang_prod.lhs];
64 for (Language::RHS::const_iterator it2 = lang_prod.rhs.begin();
65 it2 != lang_prod.rhs.end(); ++it2) {
66 const std::string& lang_symb = *it2;
68 "RHS entry \"" << lang_symb <<
69 "\" is neither a nonterminal (LHS of a production) nor a token!\n");
70 gprod.rhs.push_back(symbol_map[lang_symb]);
73 out->symbol_names = make_vector<std::string>(nsymbols);
74 for (std::map<std::string, int>::const_iterator it = symbol_map.begin();
75 it != symbol_map.end(); ++it) {
76 const std::pair<std::string, int>& pair = *it;
77 at(out->symbol_names, pair.second) = pair.first;
79 add_end_terminal(*out);
80 add_accept_production(*out);
84 std::ostream& operator<<(std::ostream& os, Language
const& lang) {
85 for (Language::Tokens::const_iterator it = lang.tokens.begin();
86 it != lang.tokens.end(); ++it) {
87 const Language::Token& token = *it;
88 os <<
"token " << token.name <<
" regex \'" << token.regex <<
"\'\n";
90 std::set<std::string> nonterminal_set;
91 std::vector<std::string> nonterminal_list;
92 for (Language::Productions::const_iterator it = lang.productions.begin();
93 it != lang.productions.end(); ++it) {
94 const Language::Production& prod = *it;
95 if (!nonterminal_set.count(prod.lhs)) {
96 nonterminal_set.insert(prod.lhs);
97 nonterminal_list.push_back(prod.lhs);
100 for (std::vector<std::string>::const_iterator it = nonterminal_list.begin();
101 it != nonterminal_list.end(); ++it) {
102 const std::string& nonterminal = *it;
103 std::stringstream ss;
104 ss << nonterminal <<
" ::=";
105 std::string lead = ss.str();
107 for (std::string::iterator it2 = lead.begin(); it2 != lead.end(); ++it2) {
111 for (Language::Productions::const_iterator it2 = lang.productions.begin();
112 it2 != lang.productions.end(); ++it2) {
113 const Language::Production& prod = *it2;
114 if (prod.lhs != nonterminal)
continue;
115 if (first) first =
false;
116 else os <<
" |\n" << lead;
117 for (Language::RHS::const_iterator it3 = prod.rhs.begin();
118 it3 != prod.rhs.end(); ++it3) {
119 const std::string& symb = *it3;
120 if (symb ==
"|") os <<
" '|'";
121 else os <<
" " << symb;
132 for (
int i = 0; i < size(language.
tokens); ++i) {
133 const std::string& name = at(language.
tokens, i).name;
134 const std::string& regex = at(language.
tokens, i).regex;
136 regex::make_dfa(result, name, regex, i);
139 regex::make_dfa(b, name, regex, i);
140 unite(result, result, b);
143 make_deterministic(result, result);
144 simplify(result, result);
147 static void make_indent_info(IndentInfo& out,
Language const& language) {
148 out.is_sensitive =
false;
149 out.indent_token = -1;
150 out.dedent_token = -1;
151 out.eqdent_token = -1;
152 out.nodent_token = -1;
153 for (
int tok_i = 0; tok_i < size(language.
tokens); ++tok_i) {
154 const Language::Token& token = at(language.
tokens, tok_i);
155 if (token.regex ==
"]INDENT[") {
157 "error: Language has two or more ]INDENT[ tokens\n");
158 out.indent_token = tok_i;
159 out.is_sensitive =
true;
160 }
else if (token.regex ==
"]DEDENT[") {
162 "error: Language has two or more ]DEDENT[ tokens\n");
163 out.dedent_token = tok_i;
164 }
else if (token.regex ==
"]EQDENT[") {
166 "error: Language has two or more ]EQDENT[ tokens\n");
167 out.eqdent_token = tok_i;
168 }
else if (token.regex ==
"]NODENT[") {
170 "error: Language has two or more ]NODENT[ tokens\n");
171 out.nodent_token = tok_i;
176 "error: Indentation-sensitive language has no ]INDENT[ token\n");
179 "error: Indentation-sensitive language has no ]DEDENT[ token\n");
182 "error: Indentation-sensitive language has no ]EQDENT[ token\n");
185 "error: Indentation-sensitive language has no ]NODENT[ token\n");
187 (out.indent_token < out.nodent_token ||
188 out.dedent_token < out.nodent_token ||
189 out.eqdent_token < out.nodent_token),
191 "error: ]NODENT[ needs to come before all other indent tokens\n");
197 make_indent_info(out->indent_info, language);
Parser make_lalr1_parser(GrammarPtr grammar, bool verbose)
Tries to create LALR(1) parser tables for a given grammar.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
Tries to create LALR(1) parser tables for a given grammar.
Declares Teuchos::Parser, ParserFail and make_lalr1_parser.
The main class for users to define a language using TeuchosParser.
Parser and lexer tables specifying how to read a Language.
ReaderTablesPtr make_reader_tables(Language const &language)
constructs ReaderTables for the given Language.
Tokens tokens
vector of tokens
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos, as well as a number of utility routines.
void make_lexer(FiniteAutomaton &result, Language const &language)
construct a lexer for the Language tokens.
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
Declares Teuchos::Language.