summaryrefslogtreecommitdiff
path: root/impl/antlr/libantlr3c-3.4/src/antlr3string.c
diff options
context:
space:
mode:
authorCarlo Zancanaro <carlo@carlo-laptop>2012-05-03 15:35:39 +1000
committerCarlo Zancanaro <carlo@carlo-laptop>2012-05-03 15:35:39 +1000
commitfcecd0e7dc0bf103986c02e2f29fb518cd5571c5 (patch)
tree518bf3fcb3733bb8cc2ef584346aa409ea618a77 /impl/antlr/libantlr3c-3.4/src/antlr3string.c
parent9fd34b8cdc98ee757fc047216bd51c698cb7b82f (diff)
Add a parser for linear equations
(Also add the antlr jar and C runtime)
Diffstat (limited to 'impl/antlr/libantlr3c-3.4/src/antlr3string.c')
-rw-r--r--impl/antlr/libantlr3c-3.4/src/antlr3string.c1402
1 files changed, 1402 insertions, 0 deletions
diff --git a/impl/antlr/libantlr3c-3.4/src/antlr3string.c b/impl/antlr/libantlr3c-3.4/src/antlr3string.c
new file mode 100644
index 0000000..b29c020
--- /dev/null
+++ b/impl/antlr/libantlr3c-3.4/src/antlr3string.c
@@ -0,0 +1,1402 @@
+/** \file
+ * Implementation of the ANTLR3 string and string factory classes
+ */
+
+// [The "BSD licence"]
+// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
+// http://www.temporal-wave.com
+// http://www.linkedin.com/in/jimidle
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+// 3. The name of the author may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <antlr3string.h>
+
+/* Factory API
+ */
+static pANTLR3_STRING newRaw8 (pANTLR3_STRING_FACTORY factory);
+static pANTLR3_STRING newRawUTF16 (pANTLR3_STRING_FACTORY factory);
+static pANTLR3_STRING newSize8 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
+static pANTLR3_STRING newSizeUTF16 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
+static pANTLR3_STRING newPtr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
+static pANTLR3_STRING newPtrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
+static pANTLR3_STRING newPtrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
+static pANTLR3_STRING newStr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
+static pANTLR3_STRING newStrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
+static pANTLR3_STRING newStrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
+static void destroy (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
+static pANTLR3_STRING printable8 (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
+static pANTLR3_STRING printableUTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
+static void closeFactory(pANTLR3_STRING_FACTORY factory);
+
+/* String API
+ */
+static pANTLR3_UINT8 set8 (pANTLR3_STRING string, const char * chars);
+static pANTLR3_UINT8 setUTF16_8 (pANTLR3_STRING string, const char * chars);
+static pANTLR3_UINT8 setUTF16_UTF16 (pANTLR3_STRING string, const char * chars);
+static pANTLR3_UINT8 append8 (pANTLR3_STRING string, const char * newbit);
+static pANTLR3_UINT8 appendUTF16_8 (pANTLR3_STRING string, const char * newbit);
+static pANTLR3_UINT8 appendUTF16_UTF16 (pANTLR3_STRING string, const char * newbit);
+static pANTLR3_UINT8 insert8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
+static pANTLR3_UINT8 insertUTF16_8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
+static pANTLR3_UINT8 insertUTF16_UTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
+
+static pANTLR3_UINT8 setS (pANTLR3_STRING string, pANTLR3_STRING chars);
+static pANTLR3_UINT8 appendS (pANTLR3_STRING string, pANTLR3_STRING newbit);
+static pANTLR3_UINT8 insertS (pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit);
+
+static pANTLR3_UINT8 addc8 (pANTLR3_STRING string, ANTLR3_UINT32 c);
+static pANTLR3_UINT8 addcUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 c);
+static pANTLR3_UINT8 addi8 (pANTLR3_STRING string, ANTLR3_INT32 i);
+static pANTLR3_UINT8 addiUTF16 (pANTLR3_STRING string, ANTLR3_INT32 i);
+static pANTLR3_UINT8 inserti8 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
+static pANTLR3_UINT8 insertiUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
+
+static ANTLR3_UINT32 compare8 (pANTLR3_STRING string, const char * compStr);
+static ANTLR3_UINT32 compareUTF16_8 (pANTLR3_STRING string, const char * compStr);
+static ANTLR3_UINT32 compareUTF16_UTF16(pANTLR3_STRING string, const char * compStr);
+static ANTLR3_UINT32 compareS (pANTLR3_STRING string, pANTLR3_STRING compStr);
+static ANTLR3_UCHAR charAt8 (pANTLR3_STRING string, ANTLR3_UINT32 offset);
+static ANTLR3_UCHAR charAtUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 offset);
+static pANTLR3_STRING subString8 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
+static pANTLR3_STRING subStringUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
+static ANTLR3_INT32 toInt32_8 (pANTLR3_STRING string);
+static ANTLR3_INT32 toInt32_UTF16 (pANTLR3_STRING string);
+static pANTLR3_STRING to8_8 (pANTLR3_STRING string);
+static pANTLR3_STRING to8_UTF16 (pANTLR3_STRING string);
+static pANTLR3_STRING toUTF8_8 (pANTLR3_STRING string);
+static pANTLR3_STRING toUTF8_UTF16 (pANTLR3_STRING string);
+
+/* Local helpers
+ */
+static void stringInit8 (pANTLR3_STRING string);
+static void stringInitUTF16 (pANTLR3_STRING string);
+static void ANTLR3_CDECL stringFree (pANTLR3_STRING string);
+
+ANTLR3_API pANTLR3_STRING_FACTORY
+antlr3StringFactoryNew(ANTLR3_UINT32 encoding)
+{
+ pANTLR3_STRING_FACTORY factory;
+
+ /* Allocate memory
+ */
+ factory = (pANTLR3_STRING_FACTORY) ANTLR3_CALLOC(1, sizeof(ANTLR3_STRING_FACTORY));
+
+ if (factory == NULL)
+ {
+ return NULL;
+ }
+
+ /* Now we make a new list to track the strings.
+ */
+ factory->strings = antlr3VectorNew(0);
+ factory->index = 0;
+
+ if (factory->strings == NULL)
+ {
+ ANTLR3_FREE(factory);
+ return NULL;
+ }
+
+ // Install the API
+ //
+ // TODO: These encodings need equivalent functions to
+ // UTF16 and 8Bit if I am going to support those encodings in the STRING stuff.
+ // The STRING stuff was intended as a quick and dirty hack for people that did not
+ // want to worry about memory and performance very much, but nobody ever reads the
+ // notes or comments or uses the email list search. I want to discourage using these
+ // interfaces as it is much more efficient to use the pointers within the tokens
+ // directly, so I am not implementing the string stuff for the newer encodings.
+ // We install the standard 8 and 16 bit functions for the UTF 8 and 16 but they
+ // will not be useful beyond returning the text.
+ //
+ switch(encoding)
+ {
+ case ANTLR3_ENC_UTF32:
+ break;
+
+ case ANTLR3_ENC_UTF32BE:
+ break;
+
+ case ANTLR3_ENC_UTF32LE:
+ break;
+
+ case ANTLR3_ENC_UTF16BE:
+ case ANTLR3_ENC_UTF16LE:
+ case ANTLR3_ENC_UTF16:
+
+ factory->newRaw = newRawUTF16;
+ factory->newSize = newSizeUTF16;
+ factory->newPtr = newPtrUTF16_UTF16;
+ factory->newPtr8 = newPtrUTF16_8;
+ factory->newStr = newStrUTF16_UTF16;
+ factory->newStr8 = newStrUTF16_8;
+ factory->printable = printableUTF16;
+ factory->destroy = destroy;
+ factory->close = closeFactory;
+ break;
+
+ case ANTLR3_ENC_UTF8:
+ case ANTLR3_ENC_EBCDIC:
+ case ANTLR3_ENC_8BIT:
+ default:
+
+ factory->newRaw = newRaw8;
+ factory->newSize = newSize8;
+ factory->newPtr = newPtr8;
+ factory->newPtr8 = newPtr8;
+ factory->newStr = newStr8;
+ factory->newStr8 = newStr8;
+ factory->printable = printable8;
+ factory->destroy = destroy;
+ factory->close = closeFactory;
+ break;
+ }
+ return factory;
+}
+
+
+/**
+ *
+ * \param factory
+ * \return
+ */
+static pANTLR3_STRING
+newRaw8 (pANTLR3_STRING_FACTORY factory)
+{
+ pANTLR3_STRING string;
+
+ string = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));
+
+ if (string == NULL)
+ {
+ return NULL;
+ }
+
+ /* Structure is allocated, now fill in the API etc.
+ */
+ stringInit8(string);
+ string->factory = factory;
+
+ /* Add the string into the allocated list
+ */
+ factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
+ string->index = factory->index++;
+
+ return string;
+}
+/**
+ *
+ * \param factory
+ * \return
+ */
+static pANTLR3_STRING
+newRawUTF16 (pANTLR3_STRING_FACTORY factory)
+{
+ pANTLR3_STRING string;
+
+ string = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));
+
+ if (string == NULL)
+ {
+ return NULL;
+ }
+
+ /* Structure is allocated, now fill in the API etc.
+ */
+ stringInitUTF16(string);
+ string->factory = factory;
+
+ /* Add the string into the allocated list
+ */
+ factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
+ string->index = factory->index++;
+
+ return string;
+}
+static
+void ANTLR3_CDECL stringFree (pANTLR3_STRING string)
+{
+ /* First free the string itself if there was anything in it
+ */
+ if (string->chars)
+ {
+ ANTLR3_FREE(string->chars);
+ }
+
+ /* Now free the space for this string
+ */
+ ANTLR3_FREE(string);
+
+ return;
+}
+/**
+ *
+ * \param string
+ * \return
+ */
+static void
+stringInit8 (pANTLR3_STRING string)
+{
+ string->len = 0;
+ string->size = 0;
+ string->chars = NULL;
+ string->encoding = ANTLR3_ENC_8BIT ;
+
+ /* API for 8 bit strings*/
+
+ string->set = set8;
+ string->set8 = set8;
+ string->append = append8;
+ string->append8 = append8;
+ string->insert = insert8;
+ string->insert8 = insert8;
+ string->addi = addi8;
+ string->inserti = inserti8;
+ string->addc = addc8;
+ string->charAt = charAt8;
+ string->compare = compare8;
+ string->compare8 = compare8;
+ string->subString = subString8;
+ string->toInt32 = toInt32_8;
+ string->to8 = to8_8;
+ string->toUTF8 = toUTF8_8;
+ string->compareS = compareS;
+ string->setS = setS;
+ string->appendS = appendS;
+ string->insertS = insertS;
+
+}
+/**
+ *
+ * \param string
+ * \return
+ */
+static void
+stringInitUTF16 (pANTLR3_STRING string)
+{
+ string->len = 0;
+ string->size = 0;
+ string->chars = NULL;
+ string->encoding = ANTLR3_ENC_8BIT;
+
+ /* API for UTF16 strings */
+
+ string->set = setUTF16_UTF16;
+ string->set8 = setUTF16_8;
+ string->append = appendUTF16_UTF16;
+ string->append8 = appendUTF16_8;
+ string->insert = insertUTF16_UTF16;
+ string->insert8 = insertUTF16_8;
+ string->addi = addiUTF16;
+ string->inserti = insertiUTF16;
+ string->addc = addcUTF16;
+ string->charAt = charAtUTF16;
+ string->compare = compareUTF16_UTF16;
+ string->compare8 = compareUTF16_8;
+ string->subString = subStringUTF16;
+ string->toInt32 = toInt32_UTF16;
+ string->to8 = to8_UTF16;
+ string->toUTF8 = toUTF8_UTF16;
+
+ string->compareS = compareS;
+ string->setS = setS;
+ string->appendS = appendS;
+ string->insertS = insertS;
+}
+/**
+ *
+ * \param string
+ * \return
+ * TODO: Implement UTF-8
+ */
+static void
+stringInitUTF8 (pANTLR3_STRING string)
+{
+ string->len = 0;
+ string->size = 0;
+ string->chars = NULL;
+
+ /* API */
+
+}
+
+// Convert an 8 bit string into a UTF8 representation, which is in fact just the string itself
+// a memcpy as we make no assumptions about the 8 bit encoding.
+//
+static pANTLR3_STRING
+toUTF8_8 (pANTLR3_STRING string)
+{
+ return string->factory->newPtr(string->factory, (pANTLR3_UINT8)(string->chars), string->len);
+}
+
+// Convert a UTF16 string into a UTF8 representation using the Unicode.org
+// supplied C algorithms, which are now contained within the ANTLR3 C runtime
+// as permitted by the Unicode license (within the source code antlr3convertutf.c/.h
+// UCS2 has the same encoding as UTF16 so we can use UTF16 converter.
+//
+static pANTLR3_STRING
+toUTF8_UTF16 (pANTLR3_STRING string)
+{
+
+ UTF8 * outputEnd;
+ UTF16 * inputEnd;
+ pANTLR3_STRING utf8String;
+
+ ConversionResult cResult;
+
+ // Allocate the output buffer, which needs to accommodate potentially
+ // 3X (in bytes) the input size (in chars).
+ //
+ utf8String = string->factory->newStr8(string->factory, (pANTLR3_UINT8)"");
+
+ if (utf8String != NULL)
+ {
+ // Free existing allocation
+ //
+ ANTLR3_FREE(utf8String->chars);
+
+ // Reallocate according to maximum expected size
+ //
+ utf8String->size = string->len *3;
+ utf8String->chars = (pANTLR3_UINT8)ANTLR3_MALLOC(utf8String->size +1);
+
+ if (utf8String->chars != NULL)
+ {
+ inputEnd = (UTF16 *) (string->chars);
+ outputEnd = (UTF8 *) (utf8String->chars);
+
+ // Call the Unicode converter
+ //
+ cResult = ConvertUTF16toUTF8
+ (
+ (const UTF16**)&inputEnd,
+ ((const UTF16 *)(string->chars)) + string->len,
+ &outputEnd,
+ outputEnd + utf8String->size - 1,
+ lenientConversion
+ );
+
+ // We don't really care if things failed or not here, we just converted
+ // everything that was vaguely possible and stopped when it wasn't. It is
+ // up to the grammar programmer to verify that the input is sensible.
+ //
+ utf8String->len = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)outputEnd) - utf8String->chars);
+
+ *(outputEnd+1) = '\0'; // Always null terminate
+ }
+ }
+ return utf8String;
+}
+
+/**
+ * Creates a new string with enough capacity for size 8 bit characters plus a terminator.
+ *
+ * \param[in] factory - Pointer to the string factory that owns strings
+ * \param[in] size - In characters
+ * \return pointer to the new string.
+ */
+static pANTLR3_STRING
+newSize8 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
+{
+ pANTLR3_STRING string;
+
+ string = factory->newRaw(factory);
+
+ if (string == NULL)
+ {
+ return string;
+ }
+
+ /* Always add one more byte for a terminator ;-)
+ */
+ string->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT8) * (size+1)));
+ *(string->chars) = '\0';
+ string->size = size + 1;
+
+
+ return string;
+}
+/**
+ * Creates a new string with enough capacity for size UTF16 characters plus a terminator.
+ *
+ * \param[in] factory - Pointer to the string factory that owns strings
+ * \param[in] size - In characters (count double for surrogate pairs!!!)
+ * \return pointer to the new string.
+ */
+static pANTLR3_STRING
+newSizeUTF16 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
+{
+ pANTLR3_STRING string;
+
+ string = factory->newRaw(factory);
+
+ if (string == NULL)
+ {
+ return string;
+ }
+
+ /* Always add one more byte for a terminator ;-)
+ */
+ string->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT16) * (size+1)));
+ *(string->chars) = '\0';
+ string->size = size+1; /* Size is always in characters, as is len */
+
+ return string;
+}
+
+/** Creates a new 8 bit string initialized with the 8 bit characters at the
+ * supplied ptr, of pre-determined size.
+ * \param[in] factory - Pointer to the string factory that owns the strings
+ * \param[in] ptr - Pointer to 8 bit encoded characters
+ * \return pointer to the new string
+ */
+static pANTLR3_STRING
+newPtr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
+{
+ pANTLR3_STRING string;
+
+ string = factory->newSize(factory, size);
+
+ if (string == NULL)
+ {
+ return NULL;
+ }
+
+ if (size <= 0)
+ {
+ return string;
+ }
+
+ if (ptr != NULL)
+ {
+ ANTLR3_MEMMOVE(string->chars, (const void *)ptr, size);
+ *(string->chars + size) = '\0'; /* Terminate, these strings are usually used for Token streams and printing etc. */
+ string->len = size;
+ }
+
+ return string;
+}
+
+/** Creates a new UTF16 string initialized with the 8 bit characters at the
+ * supplied 8 bit character ptr, of pre-determined size.
+ * \param[in] factory - Pointer to the string factory that owns the strings
+ * \param[in] ptr - Pointer to 8 bit encoded characters
+ * \return pointer to the new string
+ */
+static pANTLR3_STRING
+newPtrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
+{
+ pANTLR3_STRING string;
+
+ /* newSize accepts size in characters, not bytes
+ */
+ string = factory->newSize(factory, size);
+
+ if (string == NULL)
+ {
+ return NULL;
+ }
+
+ if (size <= 0)
+ {
+ return string;
+ }
+
+ if (ptr != NULL)
+ {
+ pANTLR3_UINT16 out;
+ ANTLR3_INT32 inSize;
+
+ out = (pANTLR3_UINT16)(string->chars);
+ inSize = size;
+
+ while (inSize-- > 0)
+ {
+ *out++ = (ANTLR3_UINT16)(*ptr++);
+ }
+
+ /* Terminate, these strings are usually used for Token streams and printing etc.
+ */
+ *(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
+
+ string->len = size;
+ }
+
+ return string;
+}
+
+/** Creates a new UTF16 string initialized with the UTF16 characters at the
+ * supplied ptr, of pre-determined size.
+ * \param[in] factory - Pointer to the string factory that owns the strings
+ * \param[in] ptr - Pointer to UTF16 encoded characters
+ * \return pointer to the new string
+ */
+static pANTLR3_STRING
+newPtrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
+{
+ pANTLR3_STRING string;
+
+ string = factory->newSize(factory, size);
+
+ if (string == NULL)
+ {
+ return NULL;
+ }
+
+ if (size <= 0)
+ {
+ return string;
+ }
+
+ if (ptr != NULL)
+ {
+ ANTLR3_MEMMOVE(string->chars, (const void *)ptr, (size * sizeof(ANTLR3_UINT16)));
+
+ /* Terminate, these strings are usually used for Token streams and printing etc.
+ */
+ *(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
+ string->len = size;
+ }
+
+ return string;
+}
+
+/** Create a new 8 bit string from the supplied, null terminated, 8 bit string pointer.
+ * \param[in] factory - Pointer to the string factory that owns strings.
+ * \param[in] ptr - Pointer to the 8 bit encoded string
+ * \return Pointer to the newly initialized string
+ */
+static pANTLR3_STRING
+newStr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
+{
+ return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
+}
+
+/** Create a new UTF16 string from the supplied, null terminated, 8 bit string pointer.
+ * \param[in] factory - Pointer to the string factory that owns strings.
+ * \param[in] ptr - Pointer to the 8 bit encoded string
+ * \return POinter to the newly initialized string
+ */
+static pANTLR3_STRING
+newStrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
+{
+ return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
+}
+
+/** Create a new UTF16 string from the supplied, null terminated, UTF16 string pointer.
+ * \param[in] factory - Pointer to the string factory that owns strings.
+ * \param[in] ptr - Pointer to the UTF16 encoded string
+ * \return Pointer to the newly initialized string
+ */
+static pANTLR3_STRING
+newStrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
+{
+ pANTLR3_UINT16 in;
+ ANTLR3_UINT32 count;
+
+ /** First, determine the length of the input string
+ */
+ in = (pANTLR3_UINT16)ptr;
+ count = 0;
+
+ while (*in++ != '\0')
+ {
+ count++;
+ }
+ return factory->newPtr(factory, ptr, count);
+}
+
+static void
+destroy (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string)
+{
+ // Record which string we are deleting
+ //
+ ANTLR3_UINT32 strIndex = string->index;
+
+ // Ensure that the string was not factory made, or we would try
+ // to delete memory that wasn't allocated outside the factory
+ // block.
+ // Remove the specific indexed string from the vector
+ //
+ factory->strings->del(factory->strings, strIndex);
+
+ // One less string in the vector, so decrement the factory index
+ // so that the next string allocated is indexed correctly with
+ // respect to the vector.
+ //
+ factory->index--;
+
+ // Now we have to reindex the strings in the vector that followed
+ // the one we just deleted. We only do this if the one we just deleted
+ // was not the last one.
+ //
+ if (strIndex< factory->index)
+ {
+ // We must reindex the strings after the one we just deleted.
+ // The one that follows the one we just deleted is also out
+ // of whack, so we start there.
+ //
+ ANTLR3_UINT32 i;
+
+ for (i = strIndex; i < factory->index; i++)
+ {
+ // Renumber the entry
+ //
+ ((pANTLR3_STRING)(factory->strings->elements[i].element))->index = i;
+ }
+ }
+
+ // The string has been destroyed and the elements of the factory are reindexed.
+ //
+
+}
+
+static pANTLR3_STRING
+printable8(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
+{
+ pANTLR3_STRING string;
+
+ /* We don't need to be too efficient here, this is mostly for error messages and so on.
+ */
+ pANTLR3_UINT8 scannedText;
+ ANTLR3_UINT32 i;
+
+ /* Assume we need as much as twice as much space to parse out the control characters
+ */
+ string = factory->newSize(factory, instr->len *2 + 1);
+
+ /* Scan through and replace unprintable (in terms of this routine)
+ * characters
+ */
+ scannedText = string->chars;
+
+ for (i = 0; i < instr->len; i++)
+ {
+ if (*(instr->chars + i) == '\n')
+ {
+ *scannedText++ = '\\';
+ *scannedText++ = 'n';
+ }
+ else if (*(instr->chars + i) == '\r')
+ {
+ *scannedText++ = '\\';
+ *scannedText++ = 'r';
+ }
+ else if (!isprint(*(instr->chars +i)))
+ {
+ *scannedText++ = '?';
+ }
+ else
+ {
+ *scannedText++ = *(instr->chars + i);
+ }
+ }
+ *scannedText = '\0';
+
+ string->len = (ANTLR3_UINT32)(scannedText - string->chars);
+
+ return string;
+}
+
+static pANTLR3_STRING
+printableUTF16(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
+{
+ pANTLR3_STRING string;
+
+ /* We don't need to be too efficient here, this is mostly for error messages and so on.
+ */
+ pANTLR3_UINT16 scannedText;
+ pANTLR3_UINT16 inText;
+ ANTLR3_UINT32 i;
+ ANTLR3_UINT32 outLen;
+
+ /* Assume we need as much as twice as much space to parse out the control characters
+ */
+ string = factory->newSize(factory, instr->len *2 + 1);
+
+ /* Scan through and replace unprintable (in terms of this routine)
+ * characters
+ */
+ scannedText = (pANTLR3_UINT16)(string->chars);
+ inText = (pANTLR3_UINT16)(instr->chars);
+ outLen = 0;
+
+ for (i = 0; i < instr->len; i++)
+ {
+ if (*(inText + i) == '\n')
+ {
+ *scannedText++ = '\\';
+ *scannedText++ = 'n';
+ outLen += 2;
+ }
+ else if (*(inText + i) == '\r')
+ {
+ *scannedText++ = '\\';
+ *scannedText++ = 'r';
+ outLen += 2;
+ }
+ else if (!isprint(*(inText +i)))
+ {
+ *scannedText++ = '?';
+ outLen++;
+ }
+ else
+ {
+ *scannedText++ = *(inText + i);
+ outLen++;
+ }
+ }
+ *scannedText = '\0';
+
+ string->len = outLen;
+
+ return string;
+}
+
+/** Fascist Capitalist Pig function created
+ * to oppress the workers comrade.
+ */
+static void
+closeFactory (pANTLR3_STRING_FACTORY factory)
+{
+ /* Delete the vector we were tracking the strings with, this will
+ * causes all the allocated strings to be deallocated too
+ */
+ factory->strings->free(factory->strings);
+
+ /* Delete the space for the factory itself
+ */
+ ANTLR3_FREE((void *)factory);
+}
+
+static pANTLR3_UINT8
+append8 (pANTLR3_STRING string, const char * newbit)
+{
+ ANTLR3_UINT32 len;
+
+ len = (ANTLR3_UINT32)strlen(newbit);
+
+ if (string->size < (string->len + len + 1))
+ {
+ string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
+ string->size = string->len + len + 1;
+ }
+
+ /* Note we copy one more byte than the strlen in order to get the trailing
+ */
+ ANTLR3_MEMMOVE((void *)(string->chars + string->len), newbit, (ANTLR3_UINT32)(len+1));
+ string->len += len;
+
+ return string->chars;
+}
+
+static pANTLR3_UINT8
+appendUTF16_8 (pANTLR3_STRING string, const char * newbit)
+{
+ ANTLR3_UINT32 len;
+ pANTLR3_UINT16 apPoint;
+ ANTLR3_UINT32 count;
+
+ len = (ANTLR3_UINT32)strlen(newbit);
+
+ if (string->size < (string->len + len + 1))
+ {
+ string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)((sizeof(ANTLR3_UINT16)*(string->len + len + 1))));
+ string->size = string->len + len + 1;
+ }
+
+ apPoint = ((pANTLR3_UINT16)string->chars) + string->len;
+ string->len += len;
+
+ for (count = 0; count < len; count++)
+ {
+ *apPoint++ = *(newbit + count);
+ }
+ *apPoint = '\0';
+
+ return string->chars;
+}
+
+static pANTLR3_UINT8
+appendUTF16_UTF16 (pANTLR3_STRING string, const char * newbit)
+{
+ ANTLR3_UINT32 len;
+ pANTLR3_UINT16 in;
+
+ /** First, determine the length of the input string
+ */
+ in = (pANTLR3_UINT16)newbit;
+ len = 0;
+
+ while (*in++ != '\0')
+ {
+ len++;
+ }
+
+ if (string->size < (string->len + len + 1))
+ {
+ string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)( sizeof(ANTLR3_UINT16) *(string->len + len + 1) ));
+ string->size = string->len + len + 1;
+ }
+
+ /* Note we copy one more byte than the strlen in order to get the trailing delimiter
+ */
+ ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + string->len), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len+1)));
+ string->len += len;
+
+ return string->chars;
+}
+
+static pANTLR3_UINT8
+set8 (pANTLR3_STRING string, const char * chars)
+{
+ ANTLR3_UINT32 len;
+
+ len = (ANTLR3_UINT32)strlen(chars);
+ if (string->size < len + 1)
+ {
+ string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(len + 1));
+ string->size = len + 1;
+ }
+
+ /* Note we copy one more byte than the strlen in order to get the trailing '\0'
+ */
+ ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)(len+1));
+ string->len = len;
+
+ return string->chars;
+
+}
+
+static pANTLR3_UINT8
+setUTF16_8 (pANTLR3_STRING string, const char * chars)
+{
+ ANTLR3_UINT32 len;
+ ANTLR3_UINT32 count;
+ pANTLR3_UINT16 apPoint;
+
+ len = (ANTLR3_UINT32)strlen(chars);
+ if (string->size < len + 1)
+ {
+ string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
+ string->size = len + 1;
+ }
+ apPoint = ((pANTLR3_UINT16)string->chars);
+ string->len = len;
+
+ for (count = 0; count < string->len; count++)
+ {
+ *apPoint++ = *(chars + count);
+ }
+ *apPoint = '\0';
+
+ return string->chars;
+}
+
+static pANTLR3_UINT8
+setUTF16_UTF16 (pANTLR3_STRING string, const char * chars)
+{
+ ANTLR3_UINT32 len;
+ pANTLR3_UINT16 in;
+
+ /** First, determine the length of the input string
+ */
+ in = (pANTLR3_UINT16)chars;
+ len = 0;
+
+ while (*in++ != '\0')
+ {
+ len++;
+ }
+
+ if (string->size < len + 1)
+ {
+ string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
+ string->size = len + 1;
+ }
+
+ /* Note we copy one more byte than the strlen in order to get the trailing '\0'
+ */
+ ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)((len+1) * sizeof(ANTLR3_UINT16)));
+ string->len = len;
+
+ return string->chars;
+
+}
+
+static pANTLR3_UINT8
+addc8 (pANTLR3_STRING string, ANTLR3_UINT32 c)
+{
+ if (string->size < string->len + 2)
+ {
+ string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + 2));
+ string->size = string->len + 2;
+ }
+ *(string->chars + string->len) = (ANTLR3_UINT8)c;
+ *(string->chars + string->len + 1) = '\0';
+ string->len++;
+
+ return string->chars;
+}
+
+static pANTLR3_UINT8
+addcUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 c)
+{
+ pANTLR3_UINT16 ptr;
+
+ if (string->size < string->len + 2)
+ {
+ string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16) * (string->len + 2)));
+ string->size = string->len + 2;
+ }
+ ptr = (pANTLR3_UINT16)(string->chars);
+
+ *(ptr + string->len) = (ANTLR3_UINT16)c;
+ *(ptr + string->len + 1) = '\0';
+ string->len++;
+
+ return string->chars;
+}
+
+static pANTLR3_UINT8
+addi8 (pANTLR3_STRING string, ANTLR3_INT32 i)
+{
+ ANTLR3_UINT8 newbit[32];
+
+ sprintf((char *)newbit, "%d", i);
+
+ return string->append8(string, (const char *)newbit);
+}
+static pANTLR3_UINT8
+addiUTF16 (pANTLR3_STRING string, ANTLR3_INT32 i)
+{
+ ANTLR3_UINT8 newbit[32];
+
+ sprintf((char *)newbit, "%d", i);
+
+ return string->append8(string, (const char *)newbit);
+}
+
+static pANTLR3_UINT8
+inserti8 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
+{
+ ANTLR3_UINT8 newbit[32];
+
+ sprintf((char *)newbit, "%d", i);
+ return string->insert8(string, point, (const char *)newbit);
+}
+static pANTLR3_UINT8
+insertiUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
+{
+ ANTLR3_UINT8 newbit[32];
+
+ sprintf((char *)newbit, "%d", i);
+ return string->insert8(string, point, (const char *)newbit);
+}
+
+static pANTLR3_UINT8
+insert8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
+{
+ ANTLR3_UINT32 len;
+
+ if (point >= string->len)
+ {
+ return string->append(string, newbit);
+ }
+
+ len = (ANTLR3_UINT32)strlen(newbit);
+
+ if (len == 0)
+ {
+ return string->chars;
+ }
+
+ if (string->size < (string->len + len + 1))
+ {
+ string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
+ string->size = string->len + len + 1;
+ }
+
+ /* Move the characters we are inserting before, including the delimiter
+ */
+ ANTLR3_MEMMOVE((void *)(string->chars + point + len), (void *)(string->chars + point), (ANTLR3_UINT32)(string->len - point + 1));
+
+ /* Note we copy the exact number of bytes
+ */
+ ANTLR3_MEMMOVE((void *)(string->chars + point), newbit, (ANTLR3_UINT32)(len));
+
+ string->len += len;
+
+ return string->chars;
+}
+
+static pANTLR3_UINT8
+insertUTF16_8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
+{
+ ANTLR3_UINT32 len;
+ ANTLR3_UINT32 count;
+ pANTLR3_UINT16 inPoint;
+
+ if (point >= string->len)
+ {
+ return string->append8(string, newbit);
+ }
+
+ len = (ANTLR3_UINT32)strlen(newbit);
+
+ if (len == 0)
+ {
+ return string->chars;
+ }
+
+ if (string->size < (string->len + len + 1))
+ {
+ string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
+ string->size = string->len + len + 1;
+ }
+
+ /* Move the characters we are inserting before, including the delimiter
+ */
+ ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));
+
+ string->len += len;
+
+ inPoint = ((pANTLR3_UINT16)(string->chars))+point;
+ for (count = 0; count<len; count++)
+ {
+ *(inPoint + count) = (ANTLR3_UINT16)(*(newbit+count));
+ }
+
+ return string->chars;
+}
+
+static pANTLR3_UINT8
+insertUTF16_UTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
+{
+ ANTLR3_UINT32 len;
+ pANTLR3_UINT16 in;
+
+ if (point >= string->len)
+ {
+ return string->append(string, newbit);
+ }
+
+ /** First, determine the length of the input string
+ */
+ in = (pANTLR3_UINT16)newbit;
+ len = 0;
+
+ while (*in++ != '\0')
+ {
+ len++;
+ }
+
+ if (len == 0)
+ {
+ return string->chars;
+ }
+
+ if (string->size < (string->len + len + 1))
+ {
+ string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
+ string->size = string->len + len + 1;
+ }
+
+ /* Move the characters we are inserting before, including the delimiter
+ */
+ ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));
+
+
+ /* Note we copy the exact number of characters
+ */
+ ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len)));
+
+ string->len += len;
+
+ return string->chars;
+}
+
+static pANTLR3_UINT8 setS (pANTLR3_STRING string, pANTLR3_STRING chars)
+{
+ return string->set(string, (const char *)(chars->chars));
+}
+
+static pANTLR3_UINT8 appendS (pANTLR3_STRING string, pANTLR3_STRING newbit)
+{
+ /* We may be passed an empty string, in which case we just return the current pointer
+ */
+ if (newbit == NULL || newbit->len == 0 || newbit->size == 0 || newbit->chars == NULL)
+ {
+ return string->chars;
+ }
+ else
+ {
+ return string->append(string, (const char *)(newbit->chars));
+ }
+}
+
+static pANTLR3_UINT8 insertS (pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit)
+{
+ return string->insert(string, point, (const char *)(newbit->chars));
+}
+
+/* Function that compares the text of a string to the supplied
+ * 8 bit character string and returns a result a la strcmp()
+ */
+static ANTLR3_UINT32
+compare8 (pANTLR3_STRING string, const char * compStr)
+{
+ return strcmp((const char *)(string->chars), compStr);
+}
+
+/* Function that compares the text of a string with the supplied character string
+ * (which is assumed to be in the same encoding as the string itself) and returns a result
+ * a la strcmp()
+ */
+static ANTLR3_UINT32
+compareUTF16_8 (pANTLR3_STRING string, const char * compStr)
+{
+ pANTLR3_UINT16 ourString;
+ ANTLR3_UINT32 charDiff;
+
+ ourString = (pANTLR3_UINT16)(string->chars);
+
+ while (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*compStr) != '\0'))
+ {
+ charDiff = *ourString - *compStr;
+ if (charDiff != 0)
+ {
+ return charDiff;
+ }
+ ourString++;
+ compStr++;
+ }
+
+ /* At this point, one of the strings was terminated
+ */
+ return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
+
+}
+
+/* Function that compares the text of a string with the supplied character string
+ * (which is assumed to be in the same encoding as the string itself) and returns a result
+ * a la strcmp()
+ */
+static ANTLR3_UINT32
+compareUTF16_UTF16 (pANTLR3_STRING string, const char * compStr8)
+{
+ pANTLR3_UINT16 ourString;
+ pANTLR3_UINT16 compStr;
+ ANTLR3_UINT32 charDiff;
+
+ ourString = (pANTLR3_UINT16)(string->chars);
+ compStr = (pANTLR3_UINT16)(compStr8);
+
+ while (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)compStr)) != '\0'))
+ {
+ charDiff = *ourString - *compStr;
+ if (charDiff != 0)
+ {
+ return charDiff;
+ }
+ ourString++;
+ compStr++;
+ }
+
+ /* At this point, one of the strings was terminated
+ */
+ return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
+}
+
+/* Function that compares the text of a string with the supplied string
+ * (which is assumed to be in the same encoding as the string itself) and returns a result
+ * a la strcmp()
+ */
+static ANTLR3_UINT32
+compareS (pANTLR3_STRING string, pANTLR3_STRING compStr)
+{
+ return string->compare(string, (const char *)compStr->chars);
+}
+
+
+/* Function that returns the character indexed at the supplied
+ * offset as a 32 bit character.
+ */
+static ANTLR3_UCHAR
+charAt8 (pANTLR3_STRING string, ANTLR3_UINT32 offset)
+{
+ if (offset > string->len)
+ {
+ return (ANTLR3_UCHAR)'\0';
+ }
+ else
+ {
+ return (ANTLR3_UCHAR)(*(string->chars + offset));
+ }
+}
+
+/* Function that returns the character indexed at the supplied
+ * offset as a 32 bit character.
+ */
+static ANTLR3_UCHAR
+charAtUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 offset)
+{
+ if (offset > string->len)
+ {
+ return (ANTLR3_UCHAR)'\0';
+ }
+ else
+ {
+ return (ANTLR3_UCHAR)(*((pANTLR3_UINT16)(string->chars) + offset));
+ }
+}
+
+/* Function that returns a substring of the supplied string a la .subString(s,e)
+ * in java runtimes.
+ */
+static pANTLR3_STRING
+subString8 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
+{
+ pANTLR3_STRING newStr;
+
+ if (endIndex > string->len)
+ {
+ endIndex = string->len + 1;
+ }
+ newStr = string->factory->newPtr(string->factory, string->chars + startIndex, endIndex - startIndex);
+
+ return newStr;
+}
+
+/* Returns a substring of the supplied string a la .subString(s,e)
+ * in java runtimes.
+ */
+static pANTLR3_STRING
+subStringUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
+{
+ pANTLR3_STRING newStr;
+
+ if (endIndex > string->len)
+ {
+ endIndex = string->len + 1;
+ }
+ newStr = string->factory->newPtr(string->factory, (pANTLR3_UINT8)((pANTLR3_UINT16)(string->chars) + startIndex), endIndex - startIndex);
+
+ return newStr;
+}
+
+/* Function that can convert the characters in the string to an integer
+ */
+static ANTLR3_INT32
+toInt32_8 (struct ANTLR3_STRING_struct * string)
+{
+ return atoi((const char *)(string->chars));
+}
+
+/* Function that can convert the characters in the string to an integer
+ */
+static ANTLR3_INT32
+toInt32_UTF16 (struct ANTLR3_STRING_struct * string)
+{
+ pANTLR3_UINT16 input;
+ ANTLR3_INT32 value;
+ ANTLR3_BOOLEAN negate;
+
+ value = 0;
+ input = (pANTLR3_UINT16)(string->chars);
+ negate = ANTLR3_FALSE;
+
+ if (*input == (ANTLR3_UCHAR)'-')
+ {
+ negate = ANTLR3_TRUE;
+ input++;
+ }
+ else if (*input == (ANTLR3_UCHAR)'+')
+ {
+ input++;
+ }
+
+ while (*input != '\0' && isdigit(*input))
+ {
+ value = value * 10;
+ value += ((ANTLR3_UINT32)(*input) - (ANTLR3_UINT32)'0');
+ input++;
+ }
+
+ return negate ? -value : value;
+}
+
+/* Function that returns a pointer to an 8 bit version of the string,
+ * which in this case is just the string as this is
+ * 8 bit encodiing anyway.
+ */
+static pANTLR3_STRING to8_8 (pANTLR3_STRING string)
+{
+ return string;
+}
+
+/* Function that returns an 8 bit version of the string,
+ * which in this case is returning all the UTF16 characters
+ * narrowed back into 8 bits, with characters that are too large
+ * replaced with '_'
+ */
+static pANTLR3_STRING to8_UTF16 (pANTLR3_STRING string)
+{
+ pANTLR3_STRING newStr;
+ ANTLR3_UINT32 i;
+
+ /* Create a new 8 bit string
+ */
+ newStr = newRaw8(string->factory);
+
+ if (newStr == NULL)
+ {
+ return NULL;
+ }
+
+ /* Always add one more byte for a terminator
+ */
+ newStr->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(string->len + 1));
+ newStr->size = string->len + 1;
+ newStr->len = string->len;
+
+ /* Now copy each UTF16 charActer , making it an 8 bit character of
+ * some sort.
+ */
+ for (i=0; i<string->len; i++)
+ {
+ ANTLR3_UCHAR c;
+
+ c = *(((pANTLR3_UINT16)(string->chars)) + i);
+
+ *(newStr->chars + i) = (ANTLR3_UINT8)(c > 255 ? '_' : c);
+ }
+
+ /* Terminate
+ */
+ *(newStr->chars + newStr->len) = '\0';
+
+ return newStr;
+}