1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
|
/** \file
* Simple string interface allows indiscriminate allocation of strings
* such that they can be allocated all over the place and released in
* one chunk via a string factory - saves lots of hassle in remembering what
* strings were allocated where.
*/
#ifndef _ANTLR3_STRING_H
#define _ANTLR3_STRING_H
// [The "BSD licence"]
// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
// http://www.temporal-wave.com
// http://www.linkedin.com/in/jimidle
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <antlr3defs.h>
#include <antlr3collections.h>
#ifdef __cplusplus
extern "C" {
#endif
/** Base string class tracks the allocations and provides simple string
* tracking functions. Mostly you can work directly on the string for things
* that don't reallocate it, like strchr() etc. Perhaps someone will want to provide implementations for UTF8
* and so on.
*/
typedef struct ANTLR3_STRING_struct
{
/** The factory that created this string
*/
pANTLR3_STRING_FACTORY factory;
/** Pointer to the current string value (starts at NULL unless
* the string allocator is told to create it with a pre known size.
*/
pANTLR3_UINT8 chars;
/** Current length of the string up to and not including, the trailing '\0'
* Note that the actual allocation (->size)
* is always at least one byte more than this to accommodate trailing '\0'
*/
ANTLR3_UINT32 len;
/** Current size of the string in bytes including the trailing '\0'
*/
ANTLR3_UINT32 size;
/** Index of string (allocation number) in case someone wants
* to explicitly release it.
*/
ANTLR3_UINT32 index;
/** Occasionally it is useful to know what the encoding of the string
* actually is, hence it is stored here as one the ANTLR3_ENCODING_ values
*/
ANTLR3_UINT8 encoding;
/** Pointer to function that sets the string value to a specific string in the default encoding
* for this string. For instance, if this is 8 bit, then this function is the same as set8
* but if the encoding is UTF16, then the pointer is assumed to point to UTF16 characters, not
* 8 bit.
*/
pANTLR3_UINT8 (*set) (struct ANTLR3_STRING_struct * string, const char * chars);
/** Pointer to function that sets the string value to a specific 8 bit string in the default encoding
* for this string. For instance, if this is an 8 bit string, then this function is the same as set8
* but if the encoding is UTF16, then the pointer is assumed to point to 8 bit characters that must
* be converted to UTF16 characters on the fly.
*/
pANTLR3_UINT8 (*set8) (struct ANTLR3_STRING_struct * string, const char * chars);
/** Pointer to function adds a raw char * type pointer in the default encoding
* for this string. For instance, if this is 8 bit, then this function is the same as append8
* but if the encoding is UTF16, then the pointer is assumed to point to UTF16 characters not
* 8 bit.
*/
pANTLR3_UINT8 (*append) (struct ANTLR3_STRING_struct * string, const char * newbit);
/** Pointer to function adds a raw char * type pointer in the default encoding
* for this string. For instance, if this is a UTF16 string, then this function assumes the pointer
* points to 8 bit characters that must be converted on the fly.
*/
pANTLR3_UINT8 (*append8) (struct ANTLR3_STRING_struct * string, const char * newbit);
/** Pointer to function that inserts the supplied string at the specified
* offset in the current string in the default encoding for this string. For instance, if this is an 8
* bit string, then this is the same as insert8, but if this is a UTF16 string, then the pointer
* must point to UTF16 characters.
*/
pANTLR3_UINT8 (*insert) (struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 point, const char * newbit);
/** Pointer to function that inserts the supplied string at the specified
* offset in the current string in the default encoding for this string. For instance, if this is a UTF16 string
* then the pointer is assumed to point at 8 bit characteres that must be converted on the fly.
*/
pANTLR3_UINT8 (*insert8) (struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 point, const char * newbit);
/** Pointer to function that sets the string value to a copy of the supplied string (strings must be in the
* same encoding.
*/
pANTLR3_UINT8 (*setS) (struct ANTLR3_STRING_struct * string, struct ANTLR3_STRING_struct * chars);
/** Pointer to function appends a copy of the characters contained in another string. Strings must be in the
* same encoding.
*/
pANTLR3_UINT8 (*appendS) (struct ANTLR3_STRING_struct * string, struct ANTLR3_STRING_struct * newbit);
/** Pointer to function that inserts a copy of the characters in the supplied string at the specified
* offset in the current string. strings must be in the same encoding.
*/
pANTLR3_UINT8 (*insertS) (struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 point, struct ANTLR3_STRING_struct * newbit);
/** Pointer to function that inserts the supplied integer in string form at the specified
* offset in the current string.
*/
pANTLR3_UINT8 (*inserti) (struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
/** Pointer to function that adds a single character to the end of the string, in the encoding of the
* string - 8 bit, UTF16, utf-8 etc. Input is a single UTF32 (32 bits wide integer) character.
*/
pANTLR3_UINT8 (*addc) (struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 c);
/** Pointer to function that adds the stringified representation of an integer
* to the string.
*/
pANTLR3_UINT8 (*addi) (struct ANTLR3_STRING_struct * string, ANTLR3_INT32 i);
/** Pointer to function that compares the text of a string to the supplied
* 8 bit character string and returns a result a la strcmp()
*/
ANTLR3_UINT32 (*compare8) (struct ANTLR3_STRING_struct * string, const char * compStr);
/** Pointer to a function that compares the text of a string with the supplied character string
* (which is assumed to be in the same encoding as the string itself) and returns a result
* a la strcmp()
*/
ANTLR3_UINT32 (*compare) (struct ANTLR3_STRING_struct * string, const char * compStr);
/** Pointer to a function that compares the text of a string with the supplied string
* (which is assumed to be in the same encoding as the string itself) and returns a result
* a la strcmp()
*/
ANTLR3_UINT32 (*compareS) (struct ANTLR3_STRING_struct * string, struct ANTLR3_STRING_struct * compStr);
/** Pointer to a function that returns the character indexed at the supplied
* offset as a 32 bit character.
*/
ANTLR3_UCHAR (*charAt) (struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 offset);
/** Pointer to a function that returns a substring of the supplied string a la .subString(s,e)
* in the Java language.
*/
struct ANTLR3_STRING_struct *
(*subString) (struct ANTLR3_STRING_struct * string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
/** Pointer to a function that returns the integer representation of any numeric characters
* at the beginning of the string
*/
ANTLR3_INT32 (*toInt32) (struct ANTLR3_STRING_struct * string);
/** Pointer to a function that yields an 8 bit string regardless of the encoding of the supplied
* string. This is useful when you want to use the text of a token in some way that requires an 8 bit
* value, such as the key for a hashtable. The function is required to produce a usable string even
* if the text given as input has characters that do not fit in 8 bit space, it will replace them
* with some arbitrary character such as '?'
*/
struct ANTLR3_STRING_struct *
(*to8) (struct ANTLR3_STRING_struct * string);
/// Pointer to a function that yields a UT8 encoded string of the current string,
/// regardless of the current encoding of the string. Because there is currently no UTF8
/// handling in the string class, it creates therefore, a string that is useful only for read only
/// applications as it will not contain methods that deal with UTF8 at the moment.
///
struct ANTLR3_STRING_struct *
(*toUTF8) (struct ANTLR3_STRING_struct * string);
}
ANTLR3_STRING;
/** Definition of the string factory interface, which creates and tracks
* strings for you of various shapes and sizes.
*/
typedef struct ANTLR3_STRING_FACTORY_struct
{
/** List of all the strings that have been allocated by the factory
*/
pANTLR3_VECTOR strings;
/* Index of next string that we allocate
*/
ANTLR3_UINT32 index;
/** Pointer to function that manufactures an empty string
*/
pANTLR3_STRING (*newRaw) (struct ANTLR3_STRING_FACTORY_struct * factory);
/** Pointer to function that manufactures a raw string with no text in it but space for size
* characters.
*/
pANTLR3_STRING (*newSize) (struct ANTLR3_STRING_FACTORY_struct * factory, ANTLR3_UINT32 size);
/** Pointer to function that manufactures a string from a given pointer and length. The pointer is assumed
* to point to characters in the same encoding as the string type, hence if this is a UTF16 string the
* pointer should point to UTF16 characters.
*/
pANTLR3_STRING (*newPtr) (struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
/** Pointer to function that manufactures a string from a given pointer and length. The pointer is assumed to
* point at 8 bit characters which must be converted on the fly to the encoding of the actual string.
*/
pANTLR3_STRING (*newPtr8) (struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
/** Pointer to function that manufactures a string from a given pointer and works out the length. The pointer is
* assumed to point to characters in the same encoding as the string itself, i.e. UTF16 if a UTF16
* string and so on.
*/
pANTLR3_STRING (*newStr) (struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_UINT8 string);
/** Pointer to function that manufactures a string from a given pointer and length. The pointer should
* point to 8 bit characters regardless of the actual encoding of the string. The 8 bit characters
* will be converted to the actual string encoding on the fly.
*/
pANTLR3_STRING (*newStr8) (struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_UINT8 string);
/** Pointer to function that deletes the string altogether
*/
void (*destroy) (struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_STRING string);
/** Pointer to function that returns a copy of the string in printable form without any control
* characters in it.
*/
pANTLR3_STRING (*printable)(struct ANTLR3_STRING_FACTORY_struct * factory, pANTLR3_STRING string);
/** Pointer to function that closes the factory
*/
void (*close) (struct ANTLR3_STRING_FACTORY_struct * factory);
}
ANTLR3_STRING_FACTORY;
#ifdef __cplusplus
}
#endif
#endif
|