From f9fc35785b53aa097a09ab1b865d33497ee1802e Mon Sep 17 00:00:00 2001 From: Carlo Zancanaro Date: Tue, 10 Jul 2012 13:01:48 +1000 Subject: Move antlr. Add `make test` to Makefile. --- antlr/libantlr3c-3.4/src/antlr3filestream.c | 474 ++++++++++++++++++++++++++++ 1 file changed, 474 insertions(+) create mode 100644 antlr/libantlr3c-3.4/src/antlr3filestream.c (limited to 'antlr/libantlr3c-3.4/src/antlr3filestream.c') diff --git a/antlr/libantlr3c-3.4/src/antlr3filestream.c b/antlr/libantlr3c-3.4/src/antlr3filestream.c new file mode 100644 index 0000000..4430ffe --- /dev/null +++ b/antlr/libantlr3c-3.4/src/antlr3filestream.c @@ -0,0 +1,474 @@ +/** \file + * \brief The ANTLR3 C filestream is used when the source character stream + * is a filesystem based input set and all the characters in the filestream + * can be loaded at once into memory and away the lexer goes. + * + * A number of initializers are provided in order that various character + * sets can be supported from input files. The ANTLR3 C runtime expects + * to deal with UTF32 characters only (the reasons for this are to + * do with the simplification of C code when using this form of Unicode + * encoding, though this is not a panacea. More information can be + * found on this by consulting: + * - http://www.unicode.org/versions/Unicode4.0.0/ch02.pdf#G11178 + * Where a well grounded discussion of the encoding formats available + * may be found. + * + */ + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include + +static void setupInputStream (pANTLR3_INPUT_STREAM input); +static pANTLR3_INPUT_STREAM antlr3CreateFileStream (pANTLR3_UINT8 fileName); +static pANTLR3_INPUT_STREAM antlr3CreateStringStream (pANTLR3_UINT8 data); + +ANTLR3_API pANTLR3_INPUT_STREAM +antlr3FileStreamNew(pANTLR3_UINT8 fileName, ANTLR3_UINT32 encoding) +{ + pANTLR3_INPUT_STREAM input; + + // First order of business is to read the file into some buffer space + // as just straight 8 bit bytes. Then we will work out the encoding and + // byte order and adjust the API functions that are installed for the + // default 8Bit stream accordingly. + // + input = antlr3CreateFileStream(fileName); + if (input == NULL) + { + return NULL; + } + + // We have the data in memory now so we can deal with it according to + // the encoding scheme we were given by the user. + // + input->encoding = encoding; + + // Now we need to work out the endian type and install any + // API functions that differ from 8Bit + // + setupInputStream(input); + + // Now we can set up the file name + // + input->istream->streamName = input->strFactory->newStr8(input->strFactory, fileName); + input->fileName = input->istream->streamName; + + return input; +} + + +ANTLR3_API pANTLR3_INPUT_STREAM +antlr3StringStreamNew(pANTLR3_UINT8 data, ANTLR3_UINT32 encoding, ANTLR3_UINT32 size, pANTLR3_UINT8 name) +{ + pANTLR3_INPUT_STREAM input; + + // First order of business is to set up the stream and install the data pointer. + // Then we will work out the encoding and byte order and adjust the API functions that are installed for the + // default 8Bit stream accordingly. + // + input = antlr3CreateStringStream(data); + if (input == NULL) + { + return NULL; + } + + // Size (in bytes) of the given 'string' + // + input->sizeBuf = size; + + // We have the data in memory now so we can deal with it according to + // the encoding scheme we were given by the user. + // + input->encoding = encoding; + + // Now we need to work out the endian type and install any + // API functions that differ from 8Bit + // + setupInputStream(input); + + // Now we can set up the file name + // + input->istream->streamName = input->strFactory->newStr8(input->strFactory, name); + input->fileName = input->istream->streamName; + + return input; +} + + +/// Determine endianess of the input stream and install the +/// API required for the encoding in that format. +/// +static void +setupInputStream(pANTLR3_INPUT_STREAM input) +{ + ANTLR3_BOOLEAN isBigEndian; + + // Used to determine the endianness of the machine we are currently + // running on. + // + ANTLR3_UINT16 bomTest = 0xFEFF; + + // What endianess is the machine we are running on? If the incoming + // encoding endianess is the same as this machine's natural byte order + // then we can use more efficient API calls. + // + if (*((pANTLR3_UINT8)(&bomTest)) == 0xFE) + { + isBigEndian = ANTLR3_TRUE; + } + else + { + isBigEndian = ANTLR3_FALSE; + } + + // What encoding did the user tell us {s}he thought it was? I am going + // to get sick of the questions on antlr-interest, I know I am. + // + switch (input->encoding) + { + case ANTLR3_ENC_UTF8: + + // See if there is a BOM at the start of this UTF-8 sequence + // and just eat it if there is. Windows .TXT files have this for instance + // as it identifies UTF-8 even though it is of no consequence for byte order + // as UTF-8 does not have a byte order. + // + if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0xEF + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0xBB + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+2)) == 0xBF + ) + { + // The UTF8 BOM is present so skip it + // + input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 3); + } + + // Install the UTF8 input routines + // + antlr3UTF8SetupStream(input); + break; + + case ANTLR3_ENC_UTF16: + + // See if there is a BOM at the start of the input. If not then + // we assume that the byte order is the natural order of this + // machine (or it is really UCS2). If there is a BOM we determine if the encoding + // is the same as the natural order of this machine. + // + if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0xFE + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0xFF + ) + { + // BOM Present, indicates Big Endian + // + input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 2); + + antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_TRUE); + } + else if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0xFF + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0xFE + ) + { + // BOM present, indicates Little Endian + // + input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 2); + + antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_FALSE); + } + else + { + // No BOM present, assume local computer byte order + // + antlr3UTF16SetupStream(input, isBigEndian, isBigEndian); + } + break; + + case ANTLR3_ENC_UTF32: + + // See if there is a BOM at the start of the input. If not then + // we assume that the byte order is the natural order of this + // machine. If there is we determine if the encoding + // is the same as the natural order of this machine. + // + if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0x00 + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0x00 + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+2)) == 0xFE + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+3)) == 0xFF + ) + { + // BOM Present, indicates Big Endian + // + input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 4); + + antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_TRUE); + } + else if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0xFF + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0xFE + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0x00 + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0x00 + ) + { + // BOM present, indicates Little Endian + // + input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 4); + + antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_FALSE); + } + else + { + // No BOM present, assume local computer byte order + // + antlr3UTF32SetupStream(input, isBigEndian, isBigEndian); + } + break; + + case ANTLR3_ENC_UTF16BE: + + // Encoding is definately Big Endian with no BOM + // + antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_TRUE); + break; + + case ANTLR3_ENC_UTF16LE: + + // Encoding is definately Little Endian with no BOM + // + antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_FALSE); + break; + + case ANTLR3_ENC_UTF32BE: + + // Encoding is definately Big Endian with no BOM + // + antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_TRUE); + break; + + case ANTLR3_ENC_UTF32LE: + + // Encoding is definately Little Endian with no BOM + // + antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_FALSE); + break; + + case ANTLR3_ENC_EBCDIC: + + // EBCDIC is basically the same as ASCII but with an on the + // fly translation to ASCII + // + antlr3EBCDICSetupStream(input); + break; + + case ANTLR3_ENC_8BIT: + default: + + // Standard 8bit/ASCII + // + antlr38BitSetupStream(input); + break; + } +} + +/** \brief Use the contents of an operating system file as the input + * for an input stream. + * + * \param fileName Name of operating system file to read. + * \return + * - Pointer to new input stream context upon success + * - One of the ANTLR3_ERR_ defines on error. + */ +static pANTLR3_INPUT_STREAM +antlr3CreateFileStream(pANTLR3_UINT8 fileName) +{ + // Pointer to the input stream we are going to create + // + pANTLR3_INPUT_STREAM input; + ANTLR3_UINT32 status; + + if (fileName == NULL) + { + return NULL; + } + + // Allocate memory for the input stream structure + // + input = (pANTLR3_INPUT_STREAM) + ANTLR3_CALLOC(1, sizeof(ANTLR3_INPUT_STREAM)); + + if (input == NULL) + { + return NULL; + } + + // Structure was allocated correctly, now we can read the file. + // + status = antlr3read8Bit(input, fileName); + + // Call the common 8 bit input stream handler + // initialization. + // + antlr3GenericSetupStream(input); + + // However if the file was not there or something then we + // need to close. Have to wait until here as we cannot call + // close until the API is installed of course. + // + if (status != ANTLR3_SUCCESS) + { + input->close(input); + return NULL; + } + + return input; +} + +ANTLR3_API ANTLR3_UINT32 +antlr3read8Bit(pANTLR3_INPUT_STREAM input, pANTLR3_UINT8 fileName) +{ + ANTLR3_FDSC infile; + ANTLR3_UINT32 fSize; + + /* Open the OS file in read binary mode + */ + infile = antlr3Fopen(fileName, "rb"); + + /* Check that it was there + */ + if (infile == NULL) + { + return (ANTLR3_UINT32)ANTLR3_ERR_NOFILE; + } + + /* It was there, so we can read the bytes now + */ + fSize = antlr3Fsize(fileName); /* Size of input file */ + + /* Allocate buffer for this input set + */ + input->data = ANTLR3_MALLOC((size_t)fSize); + input->sizeBuf = fSize; + + if (input->data == NULL) + { + return (ANTLR3_UINT32)ANTLR3_ERR_NOMEM; + } + + input->isAllocated = ANTLR3_TRUE; + + /* Now we read the file. Characters are not converted to + * the internal ANTLR encoding until they are read from the buffer + */ + antlr3Fread(infile, fSize, input->data); + + /* And close the file handle + */ + antlr3Fclose(infile); + + return ANTLR3_SUCCESS; +} + +/** \brief Open an operating system file and return the descriptor + * We just use the common open() and related functions here. + * Later we might find better ways on systems + * such as Windows and OpenVMS for instance. But the idea is to read the + * while file at once anyway, so it may be irrelevant. + */ +ANTLR3_API ANTLR3_FDSC +antlr3Fopen(pANTLR3_UINT8 filename, const char * mode) +{ + return (ANTLR3_FDSC)fopen((const char *)filename, mode); +} + +/** \brief Close an operating system file and free any handles + * etc. + */ +ANTLR3_API void +antlr3Fclose(ANTLR3_FDSC fd) +{ + fclose(fd); +} +ANTLR3_API ANTLR3_UINT32 +antlr3Fsize(pANTLR3_UINT8 fileName) +{ + struct _stat statbuf; + + _stat((const char *)fileName, &statbuf); + + return (ANTLR3_UINT32)statbuf.st_size; +} + +ANTLR3_API ANTLR3_UINT32 +antlr3Fread(ANTLR3_FDSC fdsc, ANTLR3_UINT32 count, void * data) +{ + return (ANTLR3_UINT32)fread(data, (size_t)count, 1, fdsc); +} + + +/** \brief Use the supplied 'string' as input to the stream + * + * \param data Pointer to the input data + * \return + * - Pointer to new input stream context upon success + * - NULL defines on error. + */ +static pANTLR3_INPUT_STREAM +antlr3CreateStringStream(pANTLR3_UINT8 data) +{ + // Pointer to the input stream we are going to create + // + pANTLR3_INPUT_STREAM input; + + if (data == NULL) + { + return NULL; + } + + // Allocate memory for the input stream structure + // + input = (pANTLR3_INPUT_STREAM) + ANTLR3_CALLOC(1, sizeof(ANTLR3_INPUT_STREAM)); + + if (input == NULL) + { + return NULL; + } + + // Structure was allocated correctly, now we can install the pointer + // + input->data = data; + input->isAllocated = ANTLR3_FALSE; + + // Call the common 8 bit input stream handler + // initialization. + // + antlr3GenericSetupStream(input); + + return input; +} \ No newline at end of file -- cgit v1.2.3