1 #include "Teuchos_set.hpp" 3 #include "Teuchos_regex.hpp" 8 #include "Teuchos_Assert.hpp" 10 #include "Teuchos_vector.hpp" 11 #include "Teuchos_string.hpp" 12 #include "Teuchos_chartab.hpp" 14 #include "Teuchos_chartab.hpp" 19 Language make_language() {
30 prods[PROD_REGEX](
"regex") >>
"union";
31 prods[PROD_UNION_DECAY](
"union") >>
"concat";
32 prods[PROD_UNION](
"union") >>
"union",
"|",
"concat";
33 prods[PROD_CONCAT_DECAY](
"concat") >>
"qualified";
34 prods[PROD_CONCAT](
"concat") >>
"concat",
"qualified";
35 prods[PROD_QUAL_DECAY](
"qualified") >>
"single";
36 prods[PROD_STAR](
"qualified") >>
"qualified",
"*";
37 prods[PROD_PLUS](
"qualified") >>
"qualified",
"+";
38 prods[PROD_MAYBE](
"qualified") >>
"qualified",
"?";
39 prods[PROD_SINGLE_CHAR](
"single") >>
"char";
40 prods[PROD_ANY](
"single") >>
".";
41 prods[PROD_SINGLE_SET](
"single") >>
"set";
42 prods[PROD_PARENS_UNION](
"single") >>
"(",
"union",
")";
43 prods[PROD_SET_POSITIVE](
"set") >>
"positive-set";
44 prods[PROD_SET_NEGATIVE](
"set") >>
"negative-set";
45 prods[PROD_POSITIVE_SET](
"positive-set") >>
"[",
"set-items",
"]";
46 prods[PROD_NEGATIVE_SET](
"negative-set") >>
"[",
"^",
"set-items",
"]";
47 prods[PROD_SET_ITEMS_DECAY](
"set-items") >>
"set-item";
48 prods[PROD_SET_ITEMS_ADD](
"set-items") >>
"set-items",
"set-item";
49 prods[PROD_SET_ITEM_CHAR](
"set-item") >>
"char";
50 prods[PROD_SET_ITEM_RANGE](
"set-item") >>
"range";
51 prods[PROD_RANGE](
"range") >>
"char",
"-",
"char";
52 out.tokens.resize(NTOKS);
54 out.tokens[TOK_CHAR](
"char",
"[^\\\\\\.\\[\\]\\(\\)\\|\\-\\^\\*\\+\\?]|\\\\.");
55 out.tokens[TOK_DOT](
".",
"\\.");
56 out.tokens[TOK_LRANGE](
"[",
"\\]");
57 out.tokens[TOK_RRANGE](
"]",
"\\]");
58 out.tokens[TOK_LPAREN](
"(",
"\\(");
59 out.tokens[TOK_RPAREN](
")",
"\\)");
60 out.tokens[TOK_UNION](
"|",
"\\|");
61 out.tokens[TOK_RANGE](
"-",
"\\-");
62 out.tokens[TOK_NEGATE](
"^",
"\\^");
63 out.tokens[TOK_STAR](
"*",
"\\*");
64 out.tokens[TOK_PLUS](
"+",
"\\+");
65 out.tokens[TOK_MAYBE](
"?",
"\\?");
72 std::string meta_chars_str =
".[]()|-^*+?";
73 std::set<int> all_chars;
74 for (
int i = 0; i < NCHARS; ++i) all_chars.insert(i);
75 std::set<int> nonmeta_chars = all_chars;
76 for (
int i = 0; i < size(meta_chars_str); ++i) {
77 int meta_char = at(meta_chars_str, i);
78 std::set<int>::iterator it = nonmeta_chars.find(get_symbol(meta_char));
79 nonmeta_chars.erase(it);
81 FiniteAutomaton lex_nonmeta;
82 make_set_nfa(lex_nonmeta, NCHARS, nonmeta_chars, TOK_CHAR);
83 FiniteAutomaton lex_slash;
84 make_char_single_nfa(lex_slash,
'\\');
85 FiniteAutomaton lex_any;
86 make_set_nfa(lex_any, NCHARS, all_chars);
87 FiniteAutomaton lex_escaped;
88 concat(lex_escaped, lex_slash, lex_any, TOK_CHAR);
89 FiniteAutomaton lex_char;
90 unite(lex_char, lex_nonmeta, lex_escaped);
91 FiniteAutomaton lex_metachars;
92 for (
int i = 0; i < size(meta_chars_str); ++i) {
93 int token = TOK_CHAR + i + 1;
95 FiniteAutomaton lex_metachar;
96 make_char_single_nfa(lex_metachar, at(meta_chars_str, i), token);
97 unite(lex_metachars, lex_metachars, lex_metachar);
99 make_char_single_nfa(lex_metachars, at(meta_chars_str, i), token);
102 unite(result, lex_metachars, lex_char);
103 make_deterministic(result, result);
104 simplify(result, result);
109 if (ptr.strong_count() == 0) {
110 RCP<ReaderTables> newptr(
new ReaderTables());
112 GrammarPtr grammar = make_grammar(*lang);
114 regex::make_lexer(newptr->lexer);
115 newptr->indent_info.is_sensitive =
false;
116 newptr->indent_info.indent_token = -1;
117 newptr->indent_info.dedent_token = -1;
125 if (ptr.strong_count() == 0) {
126 ptr.reset(
new Language(make_language()));
131 void make_dfa(FiniteAutomaton& result, std::string
const& name, std::string
const& regex,
int token) {
134 if (regex ==
"]INDENT[" || regex ==
"]DEDENT[" || regex ==
"]EQDENT[" || regex ==
"]NODENT[") {
135 make_dfa(result, name,
"\r?\n[ \t]*", token);
138 regex::Reader reader(token);
141 reader.read_string(result_any, regex, name);
143 std::stringstream ss;
144 ss << e.what() <<
'\n';
145 ss <<
"error: couldn't build DFA for token \"" << name <<
"\" regex \"" << regex <<
"\"\n";
146 ss <<
"repeating with DebugReader:\n";
147 DebugReader debug_reader(regex::ask_reader_tables(), ss);
148 debug_reader.read_string(result_any, regex, name);
149 throw ParserFail(ss.str());
151 swap(any_ref_cast<FiniteAutomaton>(result_any), result);
154 regex::Reader::Reader(
int result_token_in):
155 Teuchos::Reader(regex::ask_reader_tables()),
156 result_token(result_token_in) {
159 void regex::Reader::at_shift(any& result,
int token, std::string& text) {
160 if (token != TOK_CHAR)
return;
161 if (size(text) == 1) {
163 }
else if (size(text) == 2) {
168 "BUG: regex char text is \"" << text <<
"\"\n");
172 void regex::Reader::at_reduce(any& result_any,
int production, std::vector<any>& rhs) {
174 switch (production) {
176 swap(result_any, at(rhs, 0));
177 FiniteAutomaton& result = any_ref_cast<FiniteAutomaton>(result_any);
178 make_deterministic(result, result);
179 simplify(result, result);
182 case PROD_UNION_DECAY:
183 case PROD_CONCAT_DECAY:
184 case PROD_QUAL_DECAY:
185 case PROD_SET_ITEMS_DECAY:
186 case PROD_SET_ITEM_RANGE: {
187 swap(result_any, at(rhs, 0));
191 FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
192 FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
193 FiniteAutomaton& b = any_ref_cast<FiniteAutomaton>(at(rhs, 2));
198 FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
199 FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
200 FiniteAutomaton& b = any_ref_cast<FiniteAutomaton>(at(rhs, 1));
201 concat(result, a, b, result_token);
205 FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
206 FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
207 star(result, a, result_token);
211 FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
212 FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
213 plus(result, a, result_token);
217 FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
218 FiniteAutomaton& a = any_ref_cast<FiniteAutomaton>(at(rhs, 0));
219 maybe(result, a, result_token);
222 case PROD_SINGLE_CHAR: {
223 FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
224 char c = any_cast<
char>(at(rhs, 0));
225 make_char_single_nfa(result, c, result_token);
229 FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
230 make_range_nfa(result, NCHARS, 0, NCHARS - 1, result_token);
233 case PROD_SINGLE_SET: {
234 FiniteAutomaton& result = make_any_ref<FiniteAutomaton>(result_any);
235 std::set<char>& charset = any_ref_cast<std::set<char> >(at(rhs, 0));
236 make_char_set_nfa(result, charset, result_token);
239 case PROD_PARENS_UNION: {
240 swap(result_any, at(rhs, 1));
243 case PROD_SET_POSITIVE: {
244 swap(result_any, at(rhs, 0));
247 case PROD_SET_NEGATIVE: {
248 std::set<char>& result = make_any_ref<std::set<char> >(result_any);
249 std::set<char>
const& charset = any_ref_cast<std::set<char> >(at(rhs, 0));
250 negate_set(result, charset);
253 case PROD_POSITIVE_SET: {
254 swap(result_any, at(rhs, 1));
257 case PROD_NEGATIVE_SET: {
258 swap(result_any, at(rhs, 2));
261 case PROD_SET_ITEMS_ADD: {
262 std::set<char>& result = make_any_ref<std::set<char> >(result_any);
263 std::set<char>& a = any_ref_cast<std::set<char> >(at(rhs, 0));
264 std::set<char>
const& b = any_ref_cast<std::set<char> >(at(rhs, 1));
266 unite_with(result, b);
269 case PROD_SET_ITEM_CHAR: {
270 std::set<char>& result = make_any_ref<std::set<char> >(result_any);
271 char c = any_cast<
char>(at(rhs, 0));
276 std::set<char>& result = make_any_ref<std::set<char> >(result_any);
277 char a = any_cast<
char>(at(rhs, 0));
278 char b = any_cast<
char>(at(rhs, 2));
279 for (
char c = a; c <= b; ++c) {
286 "BUG: unexpected production " << production <<
'\n');
Parser make_lalr1_parser(GrammarPtr grammar, bool verbose)
Tries to create LALR(1) parser tables for a given grammar.
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
Tries to create LALR(1) parser tables for a given grammar.
Productions productions
vector of productions
Declares Teuchos::Parser, ParserFail and make_lalr1_parser.
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos, as well as a number of utility routines.
RCP< const ReaderTables > ReaderTablesPtr
an RCP to a const ReaderTables
void make_lexer(FiniteAutomaton &result, Language const &language)
construct a lexer for the Language tokens.
RCP< const Language > LanguagePtr
an RCP to a const Language
#define TEUCHOS_ASSERT(assertion_test)
This macro is throws when an assert fails.
Declares Teuchos::Reader.