diff options
Diffstat (limited to 'antlr/libantlr3c-3.4/include/antlr3recognizersharedstate.h')
-rw-r--r-- | antlr/libantlr3c-3.4/include/antlr3recognizersharedstate.h | 218 |
1 files changed, 218 insertions, 0 deletions
diff --git a/antlr/libantlr3c-3.4/include/antlr3recognizersharedstate.h b/antlr/libantlr3c-3.4/include/antlr3recognizersharedstate.h new file mode 100644 index 0000000..9e024d8 --- /dev/null +++ b/antlr/libantlr3c-3.4/include/antlr3recognizersharedstate.h @@ -0,0 +1,218 @@ +/** \file + * While the C runtime does not need to model the state of + * multiple lexers and parsers in the same way as the Java runtime does + * it is no overhead to reflect that model. In fact the + * C runtime has always been able to share recognizer state. + * + * This 'class' therefore defines all the elements of a recognizer + * (either lexer, parser or tree parser) that are need to + * track the current recognition state. Multiple recognizers + * may then share this state, for instance when one grammar + * imports another. + */ + +#ifndef _ANTLR3_RECOGNIZER_SHARED_STATE_H +#define _ANTLR3_RECOGNIZER_SHARED_STATE_H + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <antlr3defs.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** All the data elements required to track the current state + * of any recognizer (lexer, parser, tree parser). + * May be share between multiple recognizers such that + * grammar inheritance is easily supported. + */ +typedef struct ANTLR3_RECOGNIZER_SHARED_STATE_struct +{ + /** If set to ANTLR3_TRUE then the recognizer has an exception + * condition (this is tested by the generated code for the rules of + * the grammar). + */ + ANTLR3_BOOLEAN error; + + /** Points to the first in a possible chain of exceptions that the + * recognizer has discovered. + */ + pANTLR3_EXCEPTION exception; + + /** Track around a hint from the creator of the recognizer as to how big this + * thing is going to get, as the actress said to the bishop. This allows us + * to tune hash tables accordingly. This might not be the best place for this + * in the end but we will see. + */ + ANTLR3_UINT32 sizeHint; + + /** Track the set of token types that can follow any rule invocation. + * Stack structure, to support: List<BitSet>. + */ + pANTLR3_STACK following; + + + /** This is true when we see an error and before having successfully + * matched a token. Prevents generation of more than one error message + * per error. + */ + ANTLR3_BOOLEAN errorRecovery; + + /** The index into the input stream where the last error occurred. + * This is used to prevent infinite loops where an error is found + * but no token is consumed during recovery...another error is found, + * ad nauseam. This is a failsafe mechanism to guarantee that at least + * one token/tree node is consumed for two errors. + */ + ANTLR3_MARKER lastErrorIndex; + + /** In lieu of a return value, this indicates that a rule or token + * has failed to match. Reset to false upon valid token match. + */ + ANTLR3_BOOLEAN failed; + + /** When the recognizer terminates, the error handling functions + * will have incremented this value if any error occurred (that was displayed). It can then be + * used by the grammar programmer without having to use static globals. + */ + ANTLR3_UINT32 errorCount; + + /** If 0, no backtracking is going on. Safe to exec actions etc... + * If >0 then it's the level of backtracking. + */ + ANTLR3_INT32 backtracking; + + /** ANTLR3_VECTOR of ANTLR3_LIST for rule memoizing. + * Tracks the stop token index for each rule. ruleMemo[ruleIndex] is + * the memoization table for ruleIndex. For key ruleStartIndex, you + * get back the stop token for associated rule or MEMO_RULE_FAILED. + * + * This is only used if rule memoization is on. + */ + pANTLR3_INT_TRIE ruleMemo; + + /** Pointer to an array of token names + * that are generally useful in error reporting. The generated parsers install + * this pointer. The table it points to is statically allocated as 8 bit ascii + * at parser compile time - grammar token names are thus restricted in character + * sets, which does not seem to terrible. + */ + pANTLR3_UINT8 * tokenNames; + + /** User programmable pointer that can be used for instance as a place to + * store some tracking structure specific to the grammar that would not normally + * be available to the error handling functions. + */ + void * userp; + + /** The goal of all lexer rules/methods is to create a token object. + * This is an instance variable as multiple rules may collaborate to + * create a single token. For example, NUM : INT | FLOAT ; + * In this case, you want the INT or FLOAT rule to set token and not + * have it reset to a NUM token in rule NUM. + */ + pANTLR3_COMMON_TOKEN token; + + /** The goal of all lexer rules being to create a token, then a lexer + * needs to build a token factory to create them. + */ + pANTLR3_TOKEN_FACTORY tokFactory; + + /** A lexer is a source of tokens, produced by all the generated (or + * hand crafted if you like) matching rules. As such it needs to provide + * a token source interface implementation. + */ + pANTLR3_TOKEN_SOURCE tokSource; + + /** The channel number for the current token + */ + ANTLR3_UINT32 channel; + + /** The token type for the current token + */ + ANTLR3_UINT32 type; + + /** The input line (where it makes sense) on which the first character of the current + * token resides. + */ + ANTLR3_INT32 tokenStartLine; + + /** The character position of the first character of the current token + * within the line specified by tokenStartLine + */ + ANTLR3_INT32 tokenStartCharPositionInLine; + + /** What character index in the stream did the current token start at? + * Needed, for example, to get the text for current token. Set at + * the start of nextToken. + */ + ANTLR3_MARKER tokenStartCharIndex; + + /** Text for the current token. This can be overridden by setting this + * variable directly or by using the SETTEXT() macro (preferred) in your + * lexer rules. + */ + pANTLR3_STRING text; + + /** User controlled variables that will be installed in a newly created + * token. + */ + ANTLR3_UINT32 user1, user2, user3; + void * custom; + + /** Input stream stack, which allows the C programmer to switch input streams + * easily and allow the standard nextToken() implementation to deal with it + * as this is a common requirement. + */ + pANTLR3_STACK streams; + + /// A stack of token/tree rewrite streams that are available for use + /// by a parser or tree parser that is using rewrites to generate + /// an AST. This saves each rule in the recongizer from having to + /// allocate and deallocate rewtire streams on entry and exit. As + /// the parser recurses throgh the rules it will reach a steady state + /// of the maximum number of allocated streams, which instead of + /// deallocating them at rule exit, it will place on this stack for + /// reuse. The streams are then all finally freed when this stack + /// is freed. + /// + pANTLR3_VECTOR rStreams; + +} + ANTLR3_RECOGNIZER_SHARED_STATE; + +#ifdef __cplusplus +} +#endif + +#endif + + |