summaryrefslogtreecommitdiff
path: root/antlr/libantlr3c-3.4/include/antlr3baserecognizer.h
blob: 0a269d4c3eb838a38b9ca376a2d6e290cbf6360a (about) (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
/** \file
 * Defines the basic structure to support recognizing by either a lexer,
 * parser, or tree parser.
 * \addtogroup ANTLR3_BASE_RECOGNIZER
 * @{
 */
#ifndef	_ANTLR3_BASERECOGNIZER_H
#define	_ANTLR3_BASERECOGNIZER_H

// [The "BSD licence"]
// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
// http://www.temporal-wave.com
// http://www.linkedin.com/in/jimidle
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
//    derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include    <antlr3defs.h>
#include    <antlr3exception.h>
#include    <antlr3input.h>
#include    <antlr3tokenstream.h>
#include    <antlr3commontoken.h>
#include    <antlr3commontreenodestream.h>
#include	<antlr3debugeventlistener.h>
#include	<antlr3recognizersharedstate.h>

/** Type indicator for a lexer recognizer
 */
#define	    ANTLR3_TYPE_LEXER		0x0001

/** Type indicator for a parser recognizer
 */
#define	    ANTLR3_TYPE_PARSER		0x0002

/** Type indicator for a tree parser recognizer
 */
#define	    ANTLR3_TYPE_TREE_PARSER	0x0004

#ifdef __cplusplus
extern "C" {
#endif

/** \brief Base tracking context structure for all types of
 * recognizers.
 */
typedef	struct ANTLR3_BASE_RECOGNIZER_struct
{
    /// Whatever super structure is providing this interface needs a pointer to itself
    /// so that this can be passed back to it whenever the api functions
    /// are called back from here.
    ///
    void	      * super;
    
	/// Indicates the type of recognizer that we are an instance of.
    /// The programmer may set this to anything of course, but the default 
    /// implementations of the interface only really understand the built in
    /// types, so new error handlers etc would probably be required to as well.
    /// 
    ///  Valid types are:
    ///
    ///   - #ANTLR3_TYPE_LEXER  
	///	  - #ANTLR3_TYPE_PARSER
    ///   - #ANTLR3_TYPE_TREE_PARSER
    ///
    ANTLR3_UINT32	type;

	/// A pointer to the shared recognizer state, such that multiple
	/// recognizers can use the same inputs streams and so on (in
	/// the case of grammar inheritance for instance.
	///
	pANTLR3_RECOGNIZER_SHARED_STATE	state;

	/// If set to something other than NULL, then this structure is
	/// points to an instance of the debugger interface. In general, the
	/// debugger is only referenced internally in recovery/error operations
	/// so that it does not cause overhead by having to check this pointer
	/// in every function/method
	///
	pANTLR3_DEBUG_EVENT_LISTENER	debugger;


    /// Pointer to a function that matches the current input symbol
    /// against the supplied type. the function causes an error if a
    /// match is not found and the default implementation will also
    /// attempt to perform one token insertion or deletion if that is
    /// possible with the input stream. You can override the default
    /// implementation by installing a pointer to your own function
    /// in this interface after the recognizer has initialized. This can
    /// perform different recovery options or not recover at all and so on.
    /// To ignore recovery altogether, see the comments in the default
    /// implementation of this function in antlr3baserecognizer.c
    ///
    /// Note that errors are signalled by setting the error flag below
    /// and creating a new exception structure and installing it in the
    /// exception pointer below (you can chain these if you like and handle them
    /// in some customized way).
    ///
    void *		(*match)	(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
							    ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);

    /// Pointer to a function that matches the next token/char in the input stream
    /// regardless of what it actually is.
    ///
    void		(*matchAny)	(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
    
	/// Pointer to a function that decides if the token ahead of the current one is the 
	/// one we were loking for, in which case the curernt one is very likely extraneous
	/// and can be reported that way.
	///
	ANTLR3_BOOLEAN
				(*mismatchIsUnwantedToken)	(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, pANTLR3_INT_STREAM input, ANTLR3_UINT32 ttype);

	/// Pointer to a function that decides if the current token is one that can logically
	/// follow the one we were looking for, in which case the one we were looking for is 
	/// probably missing from the input.
	///
	ANTLR3_BOOLEAN
				(*mismatchIsMissingToken)	(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, pANTLR3_INT_STREAM input, pANTLR3_BITSET_LIST follow);

    /** Pointer to a function that works out what to do when a token mismatch
     *  occurs, so that Tree parsers can behave differently to other recognizers.
     */
    void		(*mismatch)	(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
					    ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);

    /** Pointer to a function to call to report a recognition problem. You may override
     *  this function with your own function, but refer to the standard implementation
     *  in antlr3baserecognizer.c for guidance. The function should recognize whether 
     *  error recovery is in force, so that it does not print out more than one error messages
     *  for the same error. From the java comments in BaseRecognizer.java:
     *
     *  This method sets errorRecovery to indicate the parser is recovering
     *  not parsing.  Once in recovery mode, no errors are generated.
     *  To get out of recovery mode, the parser must successfully match
     *  a token (after a resync).  So it will go:
     *
     * 		1. error occurs
     * 		2. enter recovery mode, report error
     * 		3. consume until token found in resynch set
     * 		4. try to resume parsing
     * 		5. next match() will reset errorRecovery mode
     */
    void		(*reportError)		    (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);

    /** Pointer to a function that is called to display a recognition error message. You may
     *  override this function independently of (*reportError)() above as that function calls
     *  this one to do the actual exception printing.
     */
    void		(*displayRecognitionError)  (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, pANTLR3_UINT8 * tokenNames);

	/// Get number of recognition errors (lexer, parser, tree parser).  Each
	/// recognizer tracks its own number.  So parser and lexer each have
	/// separate count.  Does not count the spurious errors found between
	/// an error and next valid token match
	///
	/// \see reportError()
	///
	ANTLR3_UINT32
				(*getNumberOfSyntaxErrors)	(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);

    /** Pointer to a function that recovers from an error found in the input stream.
     *  Generally, this will be a #ANTLR3_EXCEPTION_NOVIABLE_ALT but it could also
     *  be from a mismatched token that the (*match)() could not recover from.
     */
    void		(*recover)		    (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);

    /** Pointer to a function that is a hook to listen to token consumption during error recovery.
     *  This is mainly used by the debug parser to send events to the listener.
     */
    void		(*beginResync)		    (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);

    /** Pointer to a function that is a hook to listen to token consumption during error recovery.
     *  This is mainly used by the debug parser to send events to the listener.
     */
    void		(*endResync)		    (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);

	/** Pointer to a function that is a hook to listen to token consumption during error recovery.
     *  This is mainly used by the debug parser to send events to the listener.
     */
    void		(*beginBacktrack)		(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, ANTLR3_UINT32 level);

    /** Pointer to a function that is a hook to listen to token consumption during error recovery.
     *  This is mainly used by the debug parser to send events to the listener.
     */
    void		(*endBacktrack)		    (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful);

    /** Pointer to a function to computer the error recovery set for the current rule.
     *  \see antlr3ComputeErrorRecoverySet() for details.
     */
    pANTLR3_BITSET	(*computeErrorRecoverySet)  (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);

    /** Pointer to a function that computes the context-sensitive FOLLOW set for the 
     *  current rule.
     * \see antlr3ComputeCSRuleFollow() for details.
     */
    pANTLR3_BITSET	(*computeCSRuleFollow)	    (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);

    /** Pointer to a function to combine follow bitsets.
     * \see antlr3CombineFollows() for details.
     */
    pANTLR3_BITSET	(*combineFollows)	    (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 
							    ANTLR3_BOOLEAN exact);
 
    /** Pointer to a function that recovers from a mismatched token in the input stream.
     * \see antlr3RecoverMismatch() for details.
     */
    void		* (*recoverFromMismatchedToken)
						    (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
							    ANTLR3_UINT32	ttype,
							    pANTLR3_BITSET_LIST	follow);

    /** Pointer to a function that recovers from a mismatched set in the token stream, in a similar manner
     *  to (*recoverFromMismatchedToken)
     */
    void		* (*recoverFromMismatchedSet) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
							    pANTLR3_BITSET_LIST	follow);

    /** Pointer to common routine to handle single token insertion for recovery functions.
     */
    ANTLR3_BOOLEAN	(*recoverFromMismatchedElement)
						    (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
							    pANTLR3_BITSET_LIST	follow);
    
    /** Pointer to function that consumes input until the next token matches
     *  the given token.
     */
    void		(*consumeUntil)		    (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
							    ANTLR3_UINT32   tokenType);

    /** Pointer to function that consumes input until the next token matches
     *  one in the given set.
     */
    void		(*consumeUntilSet)	    (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
							    pANTLR3_BITSET	set);

    /** Pointer to function that returns an ANTLR3_LIST of the strings that identify
     *  the rules in the parser that got you to this point. Can be overridden by installing your
     *	own function set.
     *
     * \todo Document how to override invocation stack functions.
     */
    pANTLR3_STACK	(*getRuleInvocationStack)	(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
    pANTLR3_STACK	(*getRuleInvocationStackNamed)  (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
								pANTLR3_UINT8	    name);

    /** Pointer to a function that converts an ANLR3_LIST of tokens to an ANTLR3_LIST of
     *  string token names. As this is mostly used in string template processing it may not be useful
     *  in the C runtime.
     */
    pANTLR3_HASH_TABLE	(*toStrings)			(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
								pANTLR3_HASH_TABLE);

    /** Pointer to a function to return whether the rule has parsed input starting at the supplied 
     *  start index before. If the rule has not parsed input starting from the supplied start index,
     *  then it will return ANTLR3_MEMO_RULE_UNKNOWN. If it has parsed from the suppled start point
     *  then it will return the point where it last stopped parsing after that start point.
     */
    ANTLR3_MARKER	(*getRuleMemoization)		(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
								ANTLR3_INTKEY	ruleIndex,
								ANTLR3_MARKER	ruleParseStart);

    /** Pointer to function that determines whether the rule has parsed input at the current index
     *  in the input stream
     */
    ANTLR3_BOOLEAN	(*alreadyParsedRule)		(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
								ANTLR3_MARKER	ruleIndex);

    /** Pointer to function that records whether the rule has parsed the input at a 
     *  current position successfully or not.
     */
    void		(*memoize)			(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
								ANTLR3_MARKER	ruleIndex,
								ANTLR3_MARKER	ruleParseStart);

	/// Pointer to a function that returns the current input symbol.
    /// The is placed into any label for the associated token ref; e.g., x=ID.  Token
	/// and tree parsers need to return different objects. Rather than test
	/// for input stream type or change the IntStream interface, I use
	/// a simple method to ask the recognizer to tell me what the current
	/// input symbol is.
	///
	/// This is ignored for lexers and the lexer implementation of this
	/// function should return NULL.
	///
	void *		(*getCurrentInputSymbol)	(	struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, 
												pANTLR3_INT_STREAM istream);

	/// Conjure up a missing token during error recovery.
	///
	/// The recognizer attempts to recover from single missing
	/// symbols. But, actions might refer to that missing symbol.
	/// For example, x=ID {f($x);}. The action clearly assumes
	/// that there has been an identifier matched previously and that
	/// $x points at that token. If that token is missing, but
	/// the next token in the stream is what we want we assume that
	/// this token is missing and we keep going. Because we
	/// have to return some token to replace the missing token,
	/// we have to conjure one up. This method gives the user control
	/// over the tokens returned for missing tokens. Mostly,
	/// you will want to create something special for identifier
	/// tokens. For literals such as '{' and ',', the default
	/// action in the parser or tree parser works. It simply creates
	/// a CommonToken of the appropriate type. The text will be the token.
	/// If you change what tokens must be created by the lexer,
	/// override this method to create the appropriate tokens.
	///
	void *		(*getMissingSymbol)			(	struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
												pANTLR3_INT_STREAM		istream,
												pANTLR3_EXCEPTION		e,
												ANTLR3_UINT32			expectedTokenType,
												pANTLR3_BITSET_LIST		follow);

    /** Pointer to a function that returns whether the supplied grammar function
     *  will parse the current input stream or not. This is the way that syntactic
     *  predicates are evaluated. Unlike java, C is perfectly happy to invoke code
     *  via a pointer to a function (hence that's what all the ANTLR3 C interfaces 
     *  do.
     */
    ANTLR3_BOOLEAN	(*synpred)			(	struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,  void * ctx,
											void (*predicate)(void * ctx));

    /** Pointer to a function that can construct a generic exception structure
     * with such information as the input stream can provide.
     */
    void		    (*exConstruct)		(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);

    /** Reset the recognizer
     */
    void		    (*reset)			(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);

    /** Pointer to a function that knows how to free the resources of a base recognizer.
     */
    void			(*free)				(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);

}
    ANTLR3_BASE_RECOGNIZER;

#ifdef __cplusplus
}
#endif

#include    <antlr3lexer.h>
#include    <antlr3parser.h>
#include    <antlr3treeparser.h>

/// @}
///

#endif	    /* _ANTLR3_BASERECOGNIZER_H	*/