summaryrefslogtreecommitdiff
path: root/antlr/libantlr3c-3.4/include/antlr3commontoken.h
blob: c48068edd75346854c407443f95e44c01a1c79a3 (about) (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
/** \file
 * \brief Defines the interface for a common token.
 *
 * All token streams should provide their tokens using an instance
 * of this common token. A custom pointer is provided, wher you may attach
 * a further structure to enhance the common token if you feel the need
 * to do so. The C runtime will assume that a token provides implementations
 * of the interface functions, but all of them may be rplaced by your own
 * implementation if you require it.
 */
#ifndef	_ANTLR3_COMMON_TOKEN_H
#define	_ANTLR3_COMMON_TOKEN_H

// [The "BSD licence"]
// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
// http://www.temporal-wave.com
// http://www.linkedin.com/in/jimidle
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
//    derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include    <antlr3defs.h>

/** How many tokens to allocate at once in the token factory
 */
#define	ANTLR3_FACTORY_POOL_SIZE    1024

/* Base token types, which all lexer/parser tokens come after in sequence.
 */

/** Indicator of an invalid token
 */
#define	ANTLR3_TOKEN_INVALID	0

#define	ANTLR3_EOR_TOKEN_TYPE	1

/** Imaginary token type to cause a traversal of child nodes in a tree parser
 */
#define	ANTLR3_TOKEN_DOWN		2

/** Imaginary token type to signal the end of a stream of child nodes.
 */
#define	ANTLR3_TOKEN_UP		3

/** First token that can be used by users/generated code
 */

#define	ANTLR3_MIN_TOKEN_TYPE	ANTLR3_TOKEN_UP + 1

/** End of file token
 */
#define	ANTLR3_TOKEN_EOF	(ANTLR3_CHARSTREAM_EOF & 0xFFFFFFFF)

/** Default channel for a token
 */
#define	ANTLR3_TOKEN_DEFAULT_CHANNEL	0

/** Reserved channel number for a HIDDEN token - a token that
 *  is hidden from the parser.
 */
#define	HIDDEN				99

#ifdef __cplusplus
extern "C" {
#endif

// Indicates whether this token is carrying:
//
// State | Meaning
// ------+--------------------------------------
//     0 | Nothing (neither rewrite text, nor setText)
//     1 | char * to user supplied rewrite text
//     2 | pANTLR3_STRING because of setText or similar action
//
#define	ANTLR3_TEXT_NONE	0
#define	ANTLR3_TEXT_CHARP	1
#define	ANTLR3_TEXT_STRING	2

/** The definition of an ANTLR3 common token structure, which all implementations
 * of a token stream should provide, installing any further structures in the
 * custom pointer element of this structure.
 *
 * \remark
 * Token streams are in essence provided by lexers or other programs that serve
 * as lexers.
 */
typedef	struct ANTLR3_COMMON_TOKEN_struct
{
    /** The actual type of this token
     */
    ANTLR3_UINT32   type;

    /** Indicates that a token was produced from the token factory and therefore
     *  the the freeToken() method should not do anything itself because
     *  token factory is responsible for deleting it.
     */
    ANTLR3_BOOLEAN  factoryMade;

	/// A string factory that we can use if we ever need the text of a token
	/// and need to manufacture a pANTLR3_STRING
	///
	pANTLR3_STRING_FACTORY	strFactory;

    /** The line number in the input stream where this token was derived from
     */
    ANTLR3_UINT32   line;

    /** The offset into the input stream that the line in which this
     *  token resides starts.
     */
    void	    * lineStart;

    /** The character position in the line that this token was derived from
     */
    ANTLR3_INT32    charPosition;

    /** The virtual channel that this token exists in.
     */
    ANTLR3_UINT32   channel;

    /** Pointer to the input stream that this token originated in.
     */
    pANTLR3_INPUT_STREAM    input;

    /** What the index of this token is, 0, 1, .., n-2, n-1 tokens
     */
    ANTLR3_MARKER   index;

    /** The character offset in the input stream where the text for this token
     *  starts.
     */
    ANTLR3_MARKER   start;

    /** The character offset in the input stream where the text for this token
     *  stops.
     */
    ANTLR3_MARKER   stop;

	/// Indicates whether this token is carrying:
	///
	/// State | Meaning
	/// ------+--------------------------------------
	///     0 | Nothing (neither rewrite text, nor setText)
	///     1 | char * to user supplied rewrite text
	///     2 | pANTLR3_STRING because of setText or similar action
	///
	/// Affects the union structure tokText below
	/// (uses 32 bit so alignment is always good)
	///
	ANTLR3_UINT32	textState;

	union
	{
		/// Pointer that is used when the token just has a pointer to
		/// a char *, such as when a rewrite of an imaginary token supplies
		/// a string in the grammar. No sense in constructing a pANTLR3_STRING just
		/// for that, as mostly the text will not be accessed - if it is, then
		/// we will build a pANTLR3_STRING for it a that point.
		///
		pANTLR3_UCHAR	chars;

		/// Some token types actually do carry around their associated text, hence
		/// (*getText)() will return this pointer if it is not NULL
		///
		pANTLR3_STRING	text;
	}
		tokText;

    /**  Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN
     *   as the standard structure for a token, a number of user programmable 
     *	 elements are allowed in a token. This is one of them.
     */
    ANTLR3_UINT32   user1;
    
    /**  Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN
     *   as the standard structure for a token, a number of user programmable 
     *	 elements are allowed in a token. This is one of them.
     */
    ANTLR3_UINT32   user2;

    /**  Because it is a bit more of a hassle to override an ANTLR3_COMMON_TOKEN
     *   as the standard structure for a token, a number of user programmable 
     *	 elements are allowed in a token. This is one of them.
     */
    ANTLR3_UINT32   user3;

    /** Pointer to a custom element that the ANTLR3 programmer may define and install
     */
    void    * custom;

    /** Pointer to a function that knows how to free the custom structure when the 
     *  token is destroyed.
     */
    void    (*freeCustom)(void * custom);

    /* ==============================
     * API 
     */

    /** Pointer to function that returns the text pointer of a token, use
     *  toString() if you want a pANTLR3_STRING version of the token.
     */
    pANTLR3_STRING  (*getText)(struct ANTLR3_COMMON_TOKEN_struct * token);

    /** Pointer to a function that 'might' be able to set the text associated
     *  with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually
     *  do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actaully have
     *  strings associated with them but just point into the current input stream. These
     *  tokens will implement this function with a function that errors out (probably
     *  drastically.
     */
    void	    (*setText)(struct ANTLR3_COMMON_TOKEN_struct * token, pANTLR3_STRING text);

    /** Pointer to a function that 'might' be able to set the text associated
     *  with a token. Imaginary tokens such as an ANTLR3_CLASSIC_TOKEN may actually
     *  do this, however many tokens such as ANTLR3_COMMON_TOKEN do not actully have
     *  strings associated with them but just point into the current input stream. These
     *  tokens will implement this function with a function that errors out (probably
     *  drastically.
     */
    void	    (*setText8)(struct ANTLR3_COMMON_TOKEN_struct * token, pANTLR3_UINT8 text);

    /** Pointer to a function that returns the token type of this token
     */
    ANTLR3_UINT32   (*getType)(struct ANTLR3_COMMON_TOKEN_struct * token);

    /** Pointer to a function that sets the type of this token
     */
    void	    (*setType)(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 ttype);

    /** Pointer to a function that gets the 'line' number where this token resides
     */
    ANTLR3_UINT32   (*getLine)(struct ANTLR3_COMMON_TOKEN_struct * token);

    /** Pointer to a function that sets the 'line' number where this token reside
     */
    void	    (*setLine)(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 line);

    /** Pointer to a function that gets the offset in the line where this token exists
     */ 
    ANTLR3_INT32    (*getCharPositionInLine)	(struct ANTLR3_COMMON_TOKEN_struct * token);

    /** Pointer to a function that sets the offset in the line where this token exists
     */
    void	    (*setCharPositionInLine)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_INT32 pos);

    /** Pointer to a function that gets the channel that this token was placed in (parsers
     *  can 'tune' to these channels.
     */
    ANTLR3_UINT32   (*getChannel)	(struct ANTLR3_COMMON_TOKEN_struct * token);

    /** Pointer to a function that sets the channel that this token should belong to
     */
    void	    (*setChannel)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_UINT32 channel);

    /** Pointer to a function that returns an index 0...n-1 of the token in the token
     *  input stream.
     */
    ANTLR3_MARKER   (*getTokenIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token);

    /** Pointer to a function that can set the token index of this token in the token
     *  input stream.
     */
    void			(*setTokenIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER);

    /** Pointer to a function that gets the start index in the input stream for this token.
     */
    ANTLR3_MARKER   (*getStartIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token);

    /** Pointer to a function that sets the start index in the input stream for this token.
     */
    void			(*setStartIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER index);
    
    /** Pointer to a function that gets the stop index in the input stream for this token.
     */
    ANTLR3_MARKER   (*getStopIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token);

    /** Pointer to a function that sets the stop index in the input stream for this token.
     */
    void			(*setStopIndex)	(struct ANTLR3_COMMON_TOKEN_struct * token, ANTLR3_MARKER index);

    /** Pointer to a function that returns this token as a text representation that can be 
     *  printed with embedded control codes such as \n replaced with the printable sequence "\\n"
     *  This also yields a string structure that can be used more easily than the pointer to 
     *  the input stream in certain situations.
     */
    pANTLR3_STRING  (*toString)		(struct ANTLR3_COMMON_TOKEN_struct * token);
}
    ANTLR3_COMMON_TOKEN;

/** \brief ANTLR3 Token factory interface to create lots of tokens efficiently
 *  rather than creating and freeing lots of little bits of memory.
 */
typedef	struct ANTLR3_TOKEN_FACTORY_struct
{
    /** Pointers to the array of tokens that this factory has produced so far
     */
    pANTLR3_COMMON_TOKEN    *pools;

    /** Current pool tokens we are allocating from
     */
    ANTLR3_INT32	    thisPool;

    /** Maximum pool count we have available
     */
    ANTLR3_INT32            maxPool;

    /** The next token to throw out from the pool, will cause a new pool allocation
     *  if this exceeds the available tokenCount
     */
    ANTLR3_UINT32	    nextToken;

    /** Trick to initialize tokens and their API quickly, we set up this token when the
     *  factory is created, then just copy the memory it uses into the new token.
     */
    ANTLR3_COMMON_TOKEN	    unTruc;

    /** Pointer to an input stream that is using this token factory (may be NULL)
     *  which will be assigned to the tokens automatically.
     */
    pANTLR3_INPUT_STREAM    input;

    /** Pointer to a function that returns a new token
     */
    pANTLR3_COMMON_TOKEN    (*newToken)	    (struct ANTLR3_TOKEN_FACTORY_struct * factory);

    /** Pointer to a function that resets the factory so you can reuse the pools it
     *  has laready allocated
     */
    void                    (*reset)	    (struct ANTLR3_TOKEN_FACTORY_struct * factory);

    /** Pointer to a function that changes teh curent inptu stream so that
     *  new tokens are created with reference to their originating text.
     */
    void		    (*setInputStream)	(struct ANTLR3_TOKEN_FACTORY_struct * factory, pANTLR3_INPUT_STREAM input);
    /** Pointer to a function the destroys the factory
     */
    void		    (*close)	    (struct ANTLR3_TOKEN_FACTORY_struct * factory);
}
    ANTLR3_TOKEN_FACTORY;

#ifdef __cplusplus
}
#endif

#endif