diff options
Diffstat (limited to 'impl/antlr/libantlr3c-3.4/include/antlr3convertutf.h')
-rw-r--r-- | impl/antlr/libantlr3c-3.4/include/antlr3convertutf.h | 176 |
1 files changed, 0 insertions, 176 deletions
diff --git a/impl/antlr/libantlr3c-3.4/include/antlr3convertutf.h b/impl/antlr/libantlr3c-3.4/include/antlr3convertutf.h deleted file mode 100644 index 79cc82c..0000000 --- a/impl/antlr/libantlr3c-3.4/include/antlr3convertutf.h +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright 2001-2004 Unicode, Inc. - * - * Disclaimer - * - * This source code is provided as is by Unicode, Inc. No claims are - * made as to fitness for any particular purpose. No warranties of any - * kind are expressed or implied. The recipient agrees to determine - * applicability of information provided. If this file has been - * purchased on magnetic or optical media from Unicode, Inc., the - * sole remedy for any claim will be exchange of defective media - * within 90 days of receipt. - * - * Limitations on Rights to Redistribute This Code - * - * Unicode, Inc. hereby grants the right to freely use the information - * supplied in this file in the creation of products supporting the - * Unicode Standard, and to make copies of this file in any form - * for internal or external distribution as long as this notice - * remains attached. - */ - -/* --------------------------------------------------------------------- - - Conversions between UTF32, UTF-16, and UTF-8. Header file. - - Several functions are included here, forming a complete set of - conversions between the three formats. UTF-7 is not included - here, but is handled in a separate source file. - - Each of these routines takes pointers to input buffers and output - buffers. The input buffers are const. - - Each routine converts the text between *sourceStart and sourceEnd, - putting the result into the buffer between *targetStart and - targetEnd. Note: the end pointers are *after* the last item: e.g. - *(sourceEnd - 1) is the last item. - - The return result indicates whether the conversion was successful, - and if not, whether the problem was in the source or target buffers. - (Only the first encountered problem is indicated.) - - After the conversion, *sourceStart and *targetStart are both - updated to point to the end of last text successfully converted in - the respective buffers. - - Input parameters: - sourceStart - pointer to a pointer to the source buffer. - The contents of this are modified on return so that - it points at the next thing to be converted. - targetStart - similarly, pointer to pointer to the target buffer. - sourceEnd, targetEnd - respectively pointers to the ends of the - two buffers, for overflow checking only. - - These conversion functions take a ConversionFlags argument. When this - flag is set to strict, both irregular sequences and isolated surrogates - will cause an error. When the flag is set to lenient, both irregular - sequences and isolated surrogates are converted. - - Whether the flag is strict or lenient, all illegal sequences will cause - an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>, - or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code - must check for illegal sequences. - - When the flag is set to lenient, characters over 0x10FFFF are converted - to the replacement character; otherwise (when the flag is set to strict) - they constitute an error. - - Output parameters: - The value "sourceIllegal" is returned from some routines if the input - sequence is malformed. When "sourceIllegal" is returned, the source - value will point to the illegal value that caused the problem. E.g., - in UTF-8 when a sequence is malformed, it points to the start of the - malformed sequence. - - Author: Mark E. Davis, 1994. - Rev History: Rick McGowan, fixes & updates May 2001. - Fixes & updates, Sept 2001. - ------------------------------------------------------------------------- */ - -/* --------------------------------------------------------------------- - The following 4 definitions are compiler-specific. - The C standard does not guarantee that wchar_t has at least - 16 bits, so wchar_t is no less portable than unsigned short! - All should be unsigned values to avoid sign extension during - bit mask & shift operations. ------------------------------------------------------------------------- */ - - -// Changes for ANTLR3 - Jim Idle, January 2008. -// builtin types defined for Unicode types changed to -// aliases for the types that are system determined by -// ANTLR at compile time. -// -// typedef unsigned long UTF32; /* at least 32 bits */ -// typedef unsigned short UTF16; /* at least 16 bits */ -// typedef unsigned char UTF8; /* typically 8 bits */ -// typedef unsigned char Boolean; /* 0 or 1 */ - -#ifndef _ANTLR3_CONVERTUTF_H -#define _ANTLR3_CONVERTUTF_H - -#include <antlr3defs.h> - -typedef ANTLR3_UINT32 UTF32; /* at least 32 bits */ -typedef ANTLR3_UINT16 UTF16; /* at least 16 bits */ -typedef ANTLR3_UINT8 UTF8; /* typically 8 bits */ - -/* Some fundamental constants */ -#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD -#define UNI_MAX_BMP (UTF32)0x0000FFFF -#define UNI_MAX_UTF16 (UTF32)0x0010FFFF -#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF -#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF - -#define UNI_SUR_HIGH_START (UTF32)0xD800 -#define UNI_SUR_HIGH_END (UTF32)0xDBFF -#define UNI_SUR_LOW_START (UTF32)0xDC00 -#define UNI_SUR_LOW_END (UTF32)0xDFFF -#define false ANTLR3_FALSE -#define true ANTLR3_TRUE -#define halfShift ((UTF32)10) -#define halfBase ((UTF32)0x0010000UL) -#define halfMask ((UTF32)0x3FFUL) - -typedef enum { - conversionOK, /* conversion successful */ - sourceExhausted, /* partial character in source, but hit end */ - targetExhausted, /* insuff. room in target for conversion */ - sourceIllegal /* source sequence is illegal/malformed */ -} ConversionResult; - -typedef enum { - strictConversion = 0, - lenientConversion -} ConversionFlags; - -/* This is for C++ and does no harm in C */ -#ifdef __cplusplus -extern "C" { -#endif - -ConversionResult ConvertUTF8toUTF16 ( - const UTF8** sourceStart, const UTF8* sourceEnd, - UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags); - -ConversionResult ConvertUTF16toUTF8 ( - const UTF16** sourceStart, const UTF16* sourceEnd, - UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags); - -ConversionResult ConvertUTF8toUTF32 ( - const UTF8** sourceStart, const UTF8* sourceEnd, - UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags); - -ConversionResult ConvertUTF32toUTF8 ( - const UTF32** sourceStart, const UTF32* sourceEnd, - UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags); - -ConversionResult ConvertUTF16toUTF32 ( - const UTF16** sourceStart, const UTF16* sourceEnd, - UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags); - -ConversionResult ConvertUTF32toUTF16 ( - const UTF32** sourceStart, const UTF32* sourceEnd, - UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags); - -ANTLR3_BOOLEAN isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd); - -#ifdef __cplusplus -} -#endif - -#endif - -/* --------------------------------------------------------------------- */ |