1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
|
/** \file
* Defines the basic structures used to manipulate character
* streams from any input source. Any character size and encoding
* can in theory be used, so long as a set of functinos is provided that
* can return a 32 bit Integer representation of their characters amd efficiently mark and revert
* to specific offsets into their input streams.
*/
#ifndef _ANTLR3_INPUT_H
#define _ANTLR3_INPUT_H
// [The "BSD licence"]
// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
// http://www.temporal-wave.com
// http://www.linkedin.com/in/jimidle
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <antlr3defs.h>
#include <antlr3string.h>
#include <antlr3commontoken.h>
#include <antlr3intstream.h>
#include <antlr3convertutf.h>
#ifdef __cplusplus
extern "C" {
#endif
/// Master context structure for an ANTLR3 C runtime based input stream.
/// \ingroup apistructures
///
typedef struct ANTLR3_INPUT_STREAM_struct
{
/** Interfaces that provide streams must all provide
* a generic ANTLR3_INT_STREAM interface and an ANTLR3_INPUT_STREAM
* is no different.
*/
pANTLR3_INT_STREAM istream;
/** Whatever super structure is providing the INPUT stream needs a pointer to itself
* so that this can be passed back to it whenever the api functions
* are called back from this interface.
*/
void * super;
/** Pointer the start of the input string, characters may be
* taken as offsets from here and in original input format encoding.
*/
void * data;
/** Indicates if the data pointer was allocated by us, and so should be freed
* when the stream dies.
*/
int isAllocated;
/** String factory for this input stream
*/
pANTLR3_STRING_FACTORY strFactory;
/** Pointer to the next character to be consumed from the input data
* This is cast to point at the encoding of the original file that
* was read by the functions installed as pointer in this input stream
* context instance at file/string/whatever load time.
*/
void * nextChar;
/** Number of characters that can be consumed at this point in time.
* Mostly this is just what is left in the pre-read buffer, but if the
* input source is a stream such as a socket or something then we may
* call special read code to wait for more input.
*/
ANTLR3_UINT32 sizeBuf;
/** The line number we are traversing in the input file. This gets incremented
* by a newline() call in the lexer grammar actions.
*/
ANTLR3_UINT32 line;
/** Pointer into the input buffer where the current line
* started.
*/
void * currentLine;
/** The offset within the current line of the current character
*/
ANTLR3_INT32 charPositionInLine;
/** Tracks how deep mark() calls are nested
*/
ANTLR3_UINT32 markDepth;
/** List of mark() points in the input stream
*/
pANTLR3_VECTOR markers;
/** File name string, set to pointer to memory if
* you set it manually as it will be free()d
*/
pANTLR3_STRING fileName;
/** File number, needs to be set manually to some file index of your devising.
*/
ANTLR3_UINT32 fileNo;
/* API */
/** Pointer to function that closes the input stream
*/
void (*close) (struct ANTLR3_INPUT_STREAM_struct * input);
void (*free) (struct ANTLR3_INPUT_STREAM_struct * input);
/** Pointer to function that resets the input stream
*/
void (*reset) (struct ANTLR3_INPUT_STREAM_struct * input);
/** Pointer to a function that reuses and resets an input stream by
* supplying a new 'source'
*/
void (*reuse) (struct ANTLR3_INPUT_STREAM_struct * input, pANTLR3_UINT8 inString, ANTLR3_UINT32 size, pANTLR3_UINT8 name);
/**
* Pointer to function that installs a version of LA that always
* returns upper case. Only valid for character streams and creates a case
* insensitive lexer if the lexer tokens are described in upper case. The
* tokens will preserve case in the token text.
*/
void (*setUcaseLA) (pANTLR3_INPUT_STREAM input, ANTLR3_BOOLEAN flag);
/** Pointer to function to return input stream element at 1 based
* offset from nextChar. Same as _LA for char stream, but token
* streams etc. have one of these that does other stuff of course.
*/
void * (*_LT) (struct ANTLR3_INPUT_STREAM_struct * input, ANTLR3_INT32 lt);
/** Pointer to function to return the total size of the input buffer. For streams
* this may be just the total we have available so far. This means of course that
* the input stream must be careful to accumulate enough input so that any backtracking
* can be satisfied.
*/
ANTLR3_UINT32 (*size) (struct ANTLR3_INPUT_STREAM_struct * input);
/** Pointer to function to return a substring of the input stream. String is returned in allocated
* memory and is in same encoding as the input stream itself, NOT internal ANTLR3_UCHAR form.
*/
pANTLR3_STRING (*substr) (struct ANTLR3_INPUT_STREAM_struct * input, ANTLR3_MARKER start, ANTLR3_MARKER stop);
/** Pointer to function to return the current line number in the input stream
*/
ANTLR3_UINT32 (*getLine) (struct ANTLR3_INPUT_STREAM_struct * input);
/** Pointer to function to return the current line buffer in the input stream
* The pointer returned is directly into the input stream so you must copy
* it if you wish to manipulate it without damaging the input stream. Encoding
* is obviously in the same form as the input stream.
* \remark
* - Note taht this function wil lbe inaccurate if setLine is called as there
* is no way at the moment to position the input stream at a particular line
* number offset.
*/
void * (*getLineBuf) (struct ANTLR3_INPUT_STREAM_struct * input);
/** Pointer to function to return the current offset in the current input stream line
*/
ANTLR3_UINT32 (*getCharPositionInLine) (struct ANTLR3_INPUT_STREAM_struct * input);
/** Pointer to function to set the current line number in the input stream
*/
void (*setLine) (struct ANTLR3_INPUT_STREAM_struct * input, ANTLR3_UINT32 line);
/** Pointer to function to set the current position in the current line.
*/
void (*setCharPositionInLine) (struct ANTLR3_INPUT_STREAM_struct * input, ANTLR3_UINT32 position);
/** Pointer to function to override the default newline character that the input stream
* looks for to trigger the line/offset and line buffer recording information.
* \remark
* - By default the chracter '\n' will be installed as the newline trigger character. When this
* character is seen by the consume() function then the current line number is incremented and the
* current line offset is reset to 0. The Pointer for the line of input we are consuming
* is updated to point to the next character after this one in the input stream (which means it
* may become invalid if the last newline character in the file is seen (so watch out).
* - If for some reason you do not want the counters and pointers to be restee, you can set the
* chracter to some impossible character such as '\0' or whatever.
* - This is a single character only, so choose the last character in a sequence of two or more.
* - This is only a simple aid to error reporting - if you have a complicated binary input structure
* it may not be adequate, but you can always override every function in the input stream with your
* own of course, and can even write your own complete input stream set if you like.
* - It is your responsiblity to set a valid character for the input stream type. There is no point
* setting this to 0xFFFFFFFF if the input stream is 8 bit ASCII, as this will just be truncated and never
* trigger as the comparison will be (INT32)0xFF == (INT32)0xFFFFFFFF
*/
void (*SetNewLineChar) (struct ANTLR3_INPUT_STREAM_struct * input, ANTLR3_UINT32 newlineChar);
/// Character that automatically causes an internal line count
/// increment.
///
ANTLR3_UCHAR newlineChar;
/// Indicates the size, in 8 bit units, of a single character. Note that
/// the C runtime does not deal with surrogates as this would be
/// slow and complicated. If this is a UTF-8 stream then this field
/// will be set to 0. Generally you are best working internally with 32 bit characters
/// as this is the most efficient.
///
ANTLR3_UINT8 charByteSize;
/// Indicates the encoding scheme used in this input stream
///
ANTLR3_UINT32 encoding;
}
ANTLR3_INPUT_STREAM;
/** \brief Structure for track lex input states as part of mark()
* and rewind() of lexer.
*/
typedef struct ANTLR3_LEX_STATE_struct
{
/** Pointer to the next character to be consumed from the input data
* This is cast to point at the encoding of the original file that
* was read by the functions installed as pointer in this input stream
* context instance at file/string/whatever load time.
*/
void * nextChar;
/** The line number we are traversing in the input file. This gets incremented
* by a newline() call in the lexer grammer actions.
*/
ANTLR3_UINT32 line;
/** Pointer into the input buffer where the current line
* started.
*/
void * currentLine;
/** The offset within the current line of the current character
*/
ANTLR3_INT32 charPositionInLine;
}
ANTLR3_LEX_STATE;
/* Prototypes
*/
void antlr38BitSetupStream (pANTLR3_INPUT_STREAM input);
void antlr3UTF16SetupStream (pANTLR3_INPUT_STREAM input, ANTLR3_BOOLEAN machineBigEndian, ANTLR3_BOOLEAN inputBigEndian);
void antlr3UTF32SetupStream (pANTLR3_INPUT_STREAM input, ANTLR3_BOOLEAN machineBigEndian, ANTLR3_BOOLEAN inputBigEndian);
void antlr3UTF8SetupStream (pANTLR3_INPUT_STREAM input);
void antlr3EBCDICSetupStream (pANTLR3_INPUT_STREAM input);
void antlr3GenericSetupStream (pANTLR3_INPUT_STREAM input);
#ifdef __cplusplus
}
#endif
#endif /* _ANTLR3_INPUT_H */
|