diff options
Diffstat (limited to 'impl/antlr/libantlr3c-3.4/src')
23 files changed, 18954 insertions, 0 deletions
diff --git a/impl/antlr/libantlr3c-3.4/src/antlr3baserecognizer.c b/impl/antlr/libantlr3c-3.4/src/antlr3baserecognizer.c new file mode 100644 index 0000000..e2eccc6 --- /dev/null +++ b/impl/antlr/libantlr3c-3.4/src/antlr3baserecognizer.c @@ -0,0 +1,2235 @@ +/** \file + * Contains the base functions that all recognizers require. + * Any function can be overridden by a lexer/parser/tree parser or by the + * ANTLR3 programmer. + * + * \addtogroup pANTLR3_BASE_RECOGNIZER + * @{ + */ +#include <antlr3baserecognizer.h> + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifdef ANTLR3_WINDOWS +#pragma warning( disable : 4100 ) +#endif + +/* Interface functions -standard implementations cover parser and treeparser + * almost completely but are overridden by the parser or tree parser as needed. Lexer overrides + * most of these functions. + */ +static void beginResync (pANTLR3_BASE_RECOGNIZER recognizer); +static pANTLR3_BITSET computeErrorRecoverySet (pANTLR3_BASE_RECOGNIZER recognizer); +static void endResync (pANTLR3_BASE_RECOGNIZER recognizer); +static void beginBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level); +static void endBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful); + +static void * match (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow); +static void matchAny (pANTLR3_BASE_RECOGNIZER recognizer); +static void mismatch (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow); +static ANTLR3_BOOLEAN mismatchIsUnwantedToken (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype); +static ANTLR3_BOOLEAN mismatchIsMissingToken (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow); +static void reportError (pANTLR3_BASE_RECOGNIZER recognizer); +static pANTLR3_BITSET computeCSRuleFollow (pANTLR3_BASE_RECOGNIZER recognizer); +static pANTLR3_BITSET combineFollows (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact); +static void displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames); +static void recover (pANTLR3_BASE_RECOGNIZER recognizer); +static void * recoverFromMismatchedToken (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow); +static void * recoverFromMismatchedSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow); +static ANTLR3_BOOLEAN recoverFromMismatchedElement(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow); +static void consumeUntil (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType); +static void consumeUntilSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set); +static pANTLR3_STACK getRuleInvocationStack (pANTLR3_BASE_RECOGNIZER recognizer); +static pANTLR3_STACK getRuleInvocationStackNamed (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name); +static pANTLR3_HASH_TABLE toStrings (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE); +static ANTLR3_MARKER getRuleMemoization (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart); +static ANTLR3_BOOLEAN alreadyParsedRule (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex); +static void memoize (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart); +static ANTLR3_BOOLEAN synpred (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx)); +static void reset (pANTLR3_BASE_RECOGNIZER recognizer); +static void freeBR (pANTLR3_BASE_RECOGNIZER recognizer); +static void * getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream); +static void * getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e, + ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow); +static ANTLR3_UINT32 getNumberOfSyntaxErrors (pANTLR3_BASE_RECOGNIZER recognizer); + +ANTLR3_API pANTLR3_BASE_RECOGNIZER +antlr3BaseRecognizerNew(ANTLR3_UINT32 type, ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state) +{ + pANTLR3_BASE_RECOGNIZER recognizer; + + // Allocate memory for the structure + // + recognizer = (pANTLR3_BASE_RECOGNIZER) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_BASE_RECOGNIZER)); + + if (recognizer == NULL) + { + // Allocation failed + // + return NULL; + } + + + // If we have been supplied with a pre-existing recognizer state + // then we just install it, otherwise we must create one from scratch + // + if (state == NULL) + { + recognizer->state = (pANTLR3_RECOGNIZER_SHARED_STATE) ANTLR3_CALLOC(1, (size_t)sizeof(ANTLR3_RECOGNIZER_SHARED_STATE)); + + if (recognizer->state == NULL) + { + ANTLR3_FREE(recognizer); + return NULL; + } + + // Initialize any new recognizer state + // + recognizer->state->errorRecovery = ANTLR3_FALSE; + recognizer->state->lastErrorIndex = -1; + recognizer->state->failed = ANTLR3_FALSE; + recognizer->state->errorCount = 0; + recognizer->state->backtracking = 0; + recognizer->state->following = NULL; + recognizer->state->ruleMemo = NULL; + recognizer->state->tokenNames = NULL; + recognizer->state->sizeHint = sizeHint; + recognizer->state->tokSource = NULL; + recognizer->state->tokFactory = NULL; + + // Rather than check to see if we must initialize + // the stack every time we are asked for an new rewrite stream + // we just always create an empty stack and then just + // free it when the base recognizer is freed. + // + recognizer->state->rStreams = antlr3VectorNew(0); // We don't know the size. + + if (recognizer->state->rStreams == NULL) + { + // Out of memory + // + ANTLR3_FREE(recognizer->state); + ANTLR3_FREE(recognizer); + return NULL; + } + } + else + { + // Install the one we were given, and do not reset it here + // as it will either already have been initialized or will + // be in a state that needs to be preserved. + // + recognizer->state = state; + } + + // Install the BR API + // + recognizer->alreadyParsedRule = alreadyParsedRule; + recognizer->beginResync = beginResync; + recognizer->combineFollows = combineFollows; + recognizer->beginBacktrack = beginBacktrack; + recognizer->endBacktrack = endBacktrack; + recognizer->computeCSRuleFollow = computeCSRuleFollow; + recognizer->computeErrorRecoverySet = computeErrorRecoverySet; + recognizer->consumeUntil = consumeUntil; + recognizer->consumeUntilSet = consumeUntilSet; + recognizer->displayRecognitionError = displayRecognitionError; + recognizer->endResync = endResync; + recognizer->exConstruct = antlr3MTExceptionNew; + recognizer->getRuleInvocationStack = getRuleInvocationStack; + recognizer->getRuleInvocationStackNamed = getRuleInvocationStackNamed; + recognizer->getRuleMemoization = getRuleMemoization; + recognizer->match = match; + recognizer->matchAny = matchAny; + recognizer->memoize = memoize; + recognizer->mismatch = mismatch; + recognizer->mismatchIsUnwantedToken = mismatchIsUnwantedToken; + recognizer->mismatchIsMissingToken = mismatchIsMissingToken; + recognizer->recover = recover; + recognizer->recoverFromMismatchedElement= recoverFromMismatchedElement; + recognizer->recoverFromMismatchedSet = recoverFromMismatchedSet; + recognizer->recoverFromMismatchedToken = recoverFromMismatchedToken; + recognizer->getNumberOfSyntaxErrors = getNumberOfSyntaxErrors; + recognizer->reportError = reportError; + recognizer->reset = reset; + recognizer->synpred = synpred; + recognizer->toStrings = toStrings; + recognizer->getCurrentInputSymbol = getCurrentInputSymbol; + recognizer->getMissingSymbol = getMissingSymbol; + recognizer->debugger = NULL; + + recognizer->free = freeBR; + + /* Initialize variables + */ + recognizer->type = type; + + + return recognizer; +} +static void +freeBR (pANTLR3_BASE_RECOGNIZER recognizer) +{ + pANTLR3_EXCEPTION thisE; + + // Did we have a state allocated? + // + if (recognizer->state != NULL) + { + // Free any rule memoization we set up + // + if (recognizer->state->ruleMemo != NULL) + { + recognizer->state->ruleMemo->free(recognizer->state->ruleMemo); + recognizer->state->ruleMemo = NULL; + } + + // Free any exception space we have left around + // + thisE = recognizer->state->exception; + if (thisE != NULL) + { + thisE->freeEx(thisE); + } + + // Free any rewrite streams we have allocated + // + if (recognizer->state->rStreams != NULL) + { + recognizer->state->rStreams->free(recognizer->state->rStreams); + } + + // Free up any token factory we created (error recovery for instance) + // + if (recognizer->state->tokFactory != NULL) + { + recognizer->state->tokFactory->close(recognizer->state->tokFactory); + } + // Free the shared state memory + // + ANTLR3_FREE(recognizer->state); + } + + // Free the actual recognizer space + // + ANTLR3_FREE(recognizer); +} + +/** + * Creates a new Mismatched Token Exception and inserts in the recognizer + * exception stack. + * + * \param recognizer + * Context pointer for this recognizer + * + */ +ANTLR3_API void +antlr3MTExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer) +{ + /* Create a basic recognition exception structure + */ + antlr3RecognitionExceptionNew(recognizer); + + /* Now update it to indicate this is a Mismatched token exception + */ + recognizer->state->exception->name = ANTLR3_MISMATCHED_EX_NAME; + recognizer->state->exception->type = ANTLR3_MISMATCHED_TOKEN_EXCEPTION; + + return; +} + +ANTLR3_API void +antlr3RecognitionExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer) +{ + pANTLR3_EXCEPTION ex; + pANTLR3_LEXER lexer; + pANTLR3_PARSER parser; + pANTLR3_TREE_PARSER tparser; + + pANTLR3_INPUT_STREAM ins; + pANTLR3_INT_STREAM is; + pANTLR3_COMMON_TOKEN_STREAM cts; + pANTLR3_TREE_NODE_STREAM tns; + + ins = NULL; + cts = NULL; + tns = NULL; + is = NULL; + lexer = NULL; + parser = NULL; + tparser = NULL; + + switch (recognizer->type) + { + case ANTLR3_TYPE_LEXER: + + lexer = (pANTLR3_LEXER) (recognizer->super); + ins = lexer->input; + is = ins->istream; + + break; + + case ANTLR3_TYPE_PARSER: + + parser = (pANTLR3_PARSER) (recognizer->super); + cts = (pANTLR3_COMMON_TOKEN_STREAM)(parser->tstream->super); + is = parser->tstream->istream; + + break; + + case ANTLR3_TYPE_TREE_PARSER: + + tparser = (pANTLR3_TREE_PARSER) (recognizer->super); + tns = tparser->ctnstream->tnstream; + is = tns->istream; + + break; + + default: + + ANTLR3_FPRINTF(stderr, "Base recognizer function antlr3RecognitionExceptionNew called by unknown parser type - provide override for this function\n"); + return; + + break; + } + + /* Create a basic exception structure + */ + ex = antlr3ExceptionNew(ANTLR3_RECOGNITION_EXCEPTION, + (void *)ANTLR3_RECOGNITION_EX_NAME, + NULL, + ANTLR3_FALSE); + + /* Rest of information depends on the base type of the + * input stream. + */ + switch (is->type & ANTLR3_INPUT_MASK) + { + case ANTLR3_CHARSTREAM: + + ex->c = is->_LA (is, 1); /* Current input character */ + ex->line = ins->getLine (ins); /* Line number comes from stream */ + ex->charPositionInLine = ins->getCharPositionInLine (ins); /* Line offset also comes from the stream */ + ex->index = is->index (is); + ex->streamName = ins->fileName; + ex->message = "Unexpected character"; + break; + + case ANTLR3_TOKENSTREAM: + + ex->token = cts->tstream->_LT (cts->tstream, 1); /* Current input token */ + ex->line = ((pANTLR3_COMMON_TOKEN)(ex->token))->getLine (ex->token); + ex->charPositionInLine = ((pANTLR3_COMMON_TOKEN)(ex->token))->getCharPositionInLine (ex->token); + ex->index = cts->tstream->istream->index (cts->tstream->istream); + if (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF) + { + ex->streamName = NULL; + } + else + { + ex->streamName = ((pANTLR3_COMMON_TOKEN)(ex->token))->input->fileName; + } + ex->message = "Unexpected token"; + break; + + case ANTLR3_COMMONTREENODE: + + ex->token = tns->_LT (tns, 1); /* Current input tree node */ + ex->line = ((pANTLR3_BASE_TREE)(ex->token))->getLine (ex->token); + ex->charPositionInLine = ((pANTLR3_BASE_TREE)(ex->token))->getCharPositionInLine (ex->token); + ex->index = tns->istream->index (tns->istream); + + // Are you ready for this? Deep breath now... + // + { + pANTLR3_COMMON_TREE tnode; + + tnode = ((pANTLR3_COMMON_TREE)(((pANTLR3_BASE_TREE)(ex->token))->super)); + + if (tnode->token == NULL) + { + ex->streamName = ((pANTLR3_BASE_TREE)(ex->token))->strFactory->newStr(((pANTLR3_BASE_TREE)(ex->token))->strFactory, (pANTLR3_UINT8)"-unknown source-"); + } + else + { + if (tnode->token->input == NULL) + { + ex->streamName = NULL; + } + else + { + ex->streamName = tnode->token->input->fileName; + } + } + ex->message = "Unexpected node"; + } + break; + } + + ex->input = is; + ex->nextException = recognizer->state->exception; /* So we don't leak the memory */ + recognizer->state->exception = ex; + recognizer->state->error = ANTLR3_TRUE; /* Exception is outstanding */ + + return; +} + + +/// Match current input symbol against ttype. Upon error, do one token +/// insertion or deletion if possible. +/// To turn off single token insertion or deletion error +/// recovery, override mismatchRecover() and have it call +/// plain mismatch(), which does not recover. Then any error +/// in a rule will cause an exception and immediate exit from +/// rule. Rule would recover by resynchronizing to the set of +/// symbols that can follow rule ref. +/// +static void * +match( pANTLR3_BASE_RECOGNIZER recognizer, + ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow) +{ + pANTLR3_PARSER parser; + pANTLR3_TREE_PARSER tparser; + pANTLR3_INT_STREAM is; + void * matchedSymbol; + + switch (recognizer->type) + { + case ANTLR3_TYPE_PARSER: + + parser = (pANTLR3_PARSER) (recognizer->super); + tparser = NULL; + is = parser->tstream->istream; + + break; + + case ANTLR3_TYPE_TREE_PARSER: + + tparser = (pANTLR3_TREE_PARSER) (recognizer->super); + parser = NULL; + is = tparser->ctnstream->tnstream->istream; + + break; + + default: + + ANTLR3_FPRINTF(stderr, "Base recognizer function 'match' called by unknown parser type - provide override for this function\n"); + return ANTLR3_FALSE; + + break; + } + + // Pick up the current input token/node for assignment to labels + // + matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is); + + if (is->_LA(is, 1) == ttype) + { + // The token was the one we were told to expect + // + is->consume(is); // Consume that token from the stream + recognizer->state->errorRecovery = ANTLR3_FALSE; // Not in error recovery now (if we were) + recognizer->state->failed = ANTLR3_FALSE; // The match was a success + return matchedSymbol; // We are done + } + + // We did not find the expected token type, if we are backtracking then + // we just set the failed flag and return. + // + if (recognizer->state->backtracking > 0) + { + // Backtracking is going on + // + recognizer->state->failed = ANTLR3_TRUE; + return matchedSymbol; + } + + // We did not find the expected token and there is no backtracking + // going on, so we mismatch, which creates an exception in the recognizer exception + // stack. + // + matchedSymbol = recognizer->recoverFromMismatchedToken(recognizer, ttype, follow); + return matchedSymbol; +} + +/// Consumes the next token, whatever it is, and resets the recognizer state +/// so that it is not in error. +/// +/// \param recognizer +/// Recognizer context pointer +/// +static void +matchAny(pANTLR3_BASE_RECOGNIZER recognizer) +{ + pANTLR3_PARSER parser; + pANTLR3_TREE_PARSER tparser; + pANTLR3_INT_STREAM is; + + switch (recognizer->type) + { + case ANTLR3_TYPE_PARSER: + + parser = (pANTLR3_PARSER) (recognizer->super); + tparser = NULL; + is = parser->tstream->istream; + + break; + + case ANTLR3_TYPE_TREE_PARSER: + + tparser = (pANTLR3_TREE_PARSER) (recognizer->super); + parser = NULL; + is = tparser->ctnstream->tnstream->istream; + + break; + + default: + + ANTLR3_FPRINTF(stderr, "Base recognizer function 'matchAny' called by unknown parser type - provide override for this function\n"); + return; + + break; + } + recognizer->state->errorRecovery = ANTLR3_FALSE; + recognizer->state->failed = ANTLR3_FALSE; + is->consume(is); + + return; +} +/// +/// +static ANTLR3_BOOLEAN +mismatchIsUnwantedToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype) +{ + ANTLR3_UINT32 nextt; + + nextt = is->_LA(is, 2); + + if (nextt == ttype) + { + if (recognizer->state->exception != NULL) + { + recognizer->state->exception->expecting = nextt; + } + return ANTLR3_TRUE; // This token is unknown, but the next one is the one we wanted + } + else + { + return ANTLR3_FALSE; // Neither this token, nor the one following is the one we wanted + } +} + +/// +/// +static ANTLR3_BOOLEAN +mismatchIsMissingToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow) +{ + ANTLR3_BOOLEAN retcode; + pANTLR3_BITSET followClone; + pANTLR3_BITSET viableTokensFollowingThisRule; + + if (follow == NULL) + { + // There is no information about the tokens that can follow the last one + // hence we must say that the current one we found is not a member of the + // follow set and does not indicate a missing token. We will just consume this + // single token and see if the parser works it out from there. + // + return ANTLR3_FALSE; + } + + followClone = NULL; + viableTokensFollowingThisRule = NULL; + + // The C bitset maps are laid down at compile time by the + // C code generation. Hence we cannot remove things from them + // and so on. So, in order to remove EOR (if we need to) then + // we clone the static bitset. + // + followClone = antlr3BitsetLoad(follow); + if (followClone == NULL) + { + return ANTLR3_FALSE; + } + + // Compute what can follow this grammar reference + // + if (followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE)) + { + // EOR can follow, but if we are not the start symbol, we + // need to remove it. + // + if (recognizer->state->following->vector->count >= 0) + { + followClone->remove(followClone, ANTLR3_EOR_TOKEN_TYPE); + } + + // Now compute the visiable tokens that can follow this rule, according to context + // and make them part of the follow set. + // + viableTokensFollowingThisRule = recognizer->computeCSRuleFollow(recognizer); + followClone->borInPlace(followClone, viableTokensFollowingThisRule); + } + + /// if current token is consistent with what could come after set + /// then we know we're missing a token; error recovery is free to + /// "insert" the missing token + /// + /// BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR + /// in follow set to indicate that the fall of the start symbol is + /// in the set (EOF can follow). + /// + if ( followClone->isMember(followClone, is->_LA(is, 1)) + || followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE) + ) + { + retcode = ANTLR3_TRUE; + } + else + { + retcode = ANTLR3_FALSE; + } + + if (viableTokensFollowingThisRule != NULL) + { + viableTokensFollowingThisRule->free(viableTokensFollowingThisRule); + } + if (followClone != NULL) + { + followClone->free(followClone); + } + + return retcode; + +} + +/// Factor out what to do upon token mismatch so tree parsers can behave +/// differently. Override and call mismatchRecover(input, ttype, follow) +/// to get single token insertion and deletion. Use this to turn off +/// single token insertion and deletion. Override mismatchRecover +/// to call this instead. +/// +/// \remark mismatch only works for parsers and must be overridden for anything else. +/// +static void +mismatch(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow) +{ + pANTLR3_PARSER parser; + pANTLR3_TREE_PARSER tparser; + pANTLR3_INT_STREAM is; + + // Install a mismatched token exception in the exception stack + // + antlr3MTExceptionNew(recognizer); + recognizer->state->exception->expecting = ttype; + + switch (recognizer->type) + { + case ANTLR3_TYPE_PARSER: + + parser = (pANTLR3_PARSER) (recognizer->super); + tparser = NULL; + is = parser->tstream->istream; + + break; + + default: + + ANTLR3_FPRINTF(stderr, "Base recognizer function 'mismatch' called by unknown parser type - provide override for this function\n"); + return; + + break; + } + + if (mismatchIsUnwantedToken(recognizer, is, ttype)) + { + // Create a basic recognition exception structure + // + antlr3RecognitionExceptionNew(recognizer); + + // Now update it to indicate this is an unwanted token exception + // + recognizer->state->exception->name = ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME; + recognizer->state->exception->type = ANTLR3_UNWANTED_TOKEN_EXCEPTION; + + return; + } + + if (mismatchIsMissingToken(recognizer, is, follow)) + { + // Create a basic recognition exception structure + // + antlr3RecognitionExceptionNew(recognizer); + + // Now update it to indicate this is an unwanted token exception + // + recognizer->state->exception->name = ANTLR3_MISSING_TOKEN_EXCEPTION_NAME; + recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION; + + return; + } + + // Just a mismatched token is all we can dtermine + // + antlr3MTExceptionNew(recognizer); + + return; +} +/// Report a recognition problem. +/// +/// This method sets errorRecovery to indicate the parser is recovering +/// not parsing. Once in recovery mode, no errors are generated. +/// To get out of recovery mode, the parser must successfully match +/// a token (after a resync). So it will go: +/// +/// 1. error occurs +/// 2. enter recovery mode, report error +/// 3. consume until token found in resynch set +/// 4. try to resume parsing +/// 5. next match() will reset errorRecovery mode +/// +/// If you override, make sure to update errorCount if you care about that. +/// +static void +reportError (pANTLR3_BASE_RECOGNIZER recognizer) +{ + // Invoke the debugger event if there is a debugger listening to us + // + if (recognizer->debugger != NULL) + { + recognizer->debugger->recognitionException(recognizer->debugger, recognizer->state->exception); + } + + if (recognizer->state->errorRecovery == ANTLR3_TRUE) + { + // Already in error recovery so don't display another error while doing so + // + return; + } + + // Signal we are in error recovery now + // + recognizer->state->errorRecovery = ANTLR3_TRUE; + + // Indicate this recognizer had an error while processing. + // + recognizer->state->errorCount++; + + // Call the error display routine + // + recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames); +} + +static void +beginBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level) +{ + if (recognizer->debugger != NULL) + { + recognizer->debugger->beginBacktrack(recognizer->debugger, level); + } +} + +static void +endBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful) +{ + if (recognizer->debugger != NULL) + { + recognizer->debugger->endBacktrack(recognizer->debugger, level, successful); + } +} +static void +beginResync (pANTLR3_BASE_RECOGNIZER recognizer) +{ + if (recognizer->debugger != NULL) + { + recognizer->debugger->beginResync(recognizer->debugger); + } +} + +static void +endResync (pANTLR3_BASE_RECOGNIZER recognizer) +{ + if (recognizer->debugger != NULL) + { + recognizer->debugger->endResync(recognizer->debugger); + } +} + +/// Compute the error recovery set for the current rule. +/// Documentation below is from the Java implementation. +/// +/// During rule invocation, the parser pushes the set of tokens that can +/// follow that rule reference on the stack; this amounts to +/// computing FIRST of what follows the rule reference in the +/// enclosing rule. This local follow set only includes tokens +/// from within the rule; i.e., the FIRST computation done by +/// ANTLR stops at the end of a rule. +// +/// EXAMPLE +// +/// When you find a "no viable alt exception", the input is not +/// consistent with any of the alternatives for rule r. The best +/// thing to do is to consume tokens until you see something that +/// can legally follow a call to r *or* any rule that called r. +/// You don't want the exact set of viable next tokens because the +/// input might just be missing a token--you might consume the +/// rest of the input looking for one of the missing tokens. +/// +/// Consider grammar: +/// +/// a : '[' b ']' +/// | '(' b ')' +/// ; +/// b : c '^' INT ; +/// c : ID +/// | INT +/// ; +/// +/// At each rule invocation, the set of tokens that could follow +/// that rule is pushed on a stack. Here are the various "local" +/// follow sets: +/// +/// FOLLOW(b1_in_a) = FIRST(']') = ']' +/// FOLLOW(b2_in_a) = FIRST(')') = ')' +/// FOLLOW(c_in_b) = FIRST('^') = '^' +/// +/// Upon erroneous input "[]", the call chain is +/// +/// a -> b -> c +/// +/// and, hence, the follow context stack is: +/// +/// depth local follow set after call to rule +/// 0 <EOF> a (from main()) +/// 1 ']' b +/// 3 '^' c +/// +/// Notice that ')' is not included, because b would have to have +/// been called from a different context in rule a for ')' to be +/// included. +/// +/// For error recovery, we cannot consider FOLLOW(c) +/// (context-sensitive or otherwise). We need the combined set of +/// all context-sensitive FOLLOW sets--the set of all tokens that +/// could follow any reference in the call chain. We need to +/// resync to one of those tokens. Note that FOLLOW(c)='^' and if +/// we resync'd to that token, we'd consume until EOF. We need to +/// sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. +/// In this case, for input "[]", LA(1) is in this set so we would +/// not consume anything and after printing an error rule c would +/// return normally. It would not find the required '^' though. +/// At this point, it gets a mismatched token error and throws an +/// exception (since LA(1) is not in the viable following token +/// set). The rule exception handler tries to recover, but finds +/// the same recovery set and doesn't consume anything. Rule b +/// exits normally returning to rule a. Now it finds the ']' (and +/// with the successful match exits errorRecovery mode). +/// +/// So, you can see that the parser walks up call chain looking +/// for the token that was a member of the recovery set. +/// +/// Errors are not generated in errorRecovery mode. +/// +/// ANTLR's error recovery mechanism is based upon original ideas: +/// +/// "Algorithms + Data Structures = Programs" by Niklaus Wirth +/// +/// and +/// +/// "A note on error recovery in recursive descent parsers": +/// http://portal.acm.org/citation.cfm?id=947902.947905 +/// +/// Later, Josef Grosch had some good ideas: +/// +/// "Efficient and Comfortable Error Recovery in Recursive Descent +/// Parsers": +/// ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip +/// +/// Like Grosch I implemented local FOLLOW sets that are combined +/// at run-time upon error to avoid overhead during parsing. +/// +static pANTLR3_BITSET +computeErrorRecoverySet (pANTLR3_BASE_RECOGNIZER recognizer) +{ + return recognizer->combineFollows(recognizer, ANTLR3_FALSE); +} + +/// Compute the context-sensitive FOLLOW set for current rule. +/// Documentation below is from the Java runtime. +/// +/// This is the set of token types that can follow a specific rule +/// reference given a specific call chain. You get the set of +/// viable tokens that can possibly come next (look ahead depth 1) +/// given the current call chain. Contrast this with the +/// definition of plain FOLLOW for rule r: +/// +/// FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} +/// +/// where x in T* and alpha, beta in V*; T is set of terminals and +/// V is the set of terminals and non terminals. In other words, +/// FOLLOW(r) is the set of all tokens that can possibly follow +/// references to r in///any* sentential form (context). At +/// runtime, however, we know precisely which context applies as +/// we have the call chain. We may compute the exact (rather +/// than covering superset) set of following tokens. +/// +/// For example, consider grammar: +/// +/// stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} +/// | "return" expr '.' +/// ; +/// expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} +/// atom : INT // FOLLOW(atom)=={'+',')',';','.'} +/// | '(' expr ')' +/// ; +/// +/// The FOLLOW sets are all inclusive whereas context-sensitive +/// FOLLOW sets are precisely what could follow a rule reference. +/// For input input "i=(3);", here is the derivation: +/// +/// stat => ID '=' expr ';' +/// => ID '=' atom ('+' atom)* ';' +/// => ID '=' '(' expr ')' ('+' atom)* ';' +/// => ID '=' '(' atom ')' ('+' atom)* ';' +/// => ID '=' '(' INT ')' ('+' atom)* ';' +/// => ID '=' '(' INT ')' ';' +/// +/// At the "3" token, you'd have a call chain of +/// +/// stat -> expr -> atom -> expr -> atom +/// +/// What can follow that specific nested ref to atom? Exactly ')' +/// as you can see by looking at the derivation of this specific +/// input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. +/// +/// You want the exact viable token set when recovering from a +/// token mismatch. Upon token mismatch, if LA(1) is member of +/// the viable next token set, then you know there is most likely +/// a missing token in the input stream. "Insert" one by just not +/// throwing an exception. +/// +static pANTLR3_BITSET +computeCSRuleFollow (pANTLR3_BASE_RECOGNIZER recognizer) +{ + return recognizer->combineFollows(recognizer, ANTLR3_FALSE); +} + +/// Compute the current followset for the input stream. +/// +static pANTLR3_BITSET +combineFollows (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact) +{ + pANTLR3_BITSET followSet; + pANTLR3_BITSET localFollowSet; + ANTLR3_UINT32 top; + ANTLR3_UINT32 i; + + top = recognizer->state->following->size(recognizer->state->following); + + followSet = antlr3BitsetNew(0); + localFollowSet = NULL; + + for (i = top; i>0; i--) + { + localFollowSet = antlr3BitsetLoad((pANTLR3_BITSET_LIST) recognizer->state->following->get(recognizer->state->following, i-1)); + + if (localFollowSet != NULL) + { + followSet->borInPlace(followSet, localFollowSet); + + if (exact == ANTLR3_TRUE) + { + if (localFollowSet->isMember(localFollowSet, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_FALSE) + { + // Only leave EOR in the set if at top (start rule); this lets us know + // if we have to include the follow(start rule); I.E., EOF + // + if (i>1) + { + followSet->remove(followSet, ANTLR3_EOR_TOKEN_TYPE); + } + } + else + { + break; // Cannot see End Of Rule from here, just drop out + } + } + localFollowSet->free(localFollowSet); + localFollowSet = NULL; + } + } + + if (localFollowSet != NULL) + { + localFollowSet->free(localFollowSet); + } + return followSet; +} + +/// Standard/Example error display method. +/// No generic error message display funciton coudl possibly do everything correctly +/// for all possible parsers. Hence you are provided with this example routine, which +/// you should override in your parser/tree parser to do as you will. +/// +/// Here we depart somewhat from the Java runtime as that has now split up a lot +/// of the error display routines into spearate units. However, ther is little advantage +/// to this in the C version as you will probably implement all such routines as a +/// separate translation unit, rather than install them all as pointers to functions +/// in the base recognizer. +/// +static void +displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames) +{ + pANTLR3_PARSER parser; + pANTLR3_TREE_PARSER tparser; + pANTLR3_INT_STREAM is; + pANTLR3_STRING ttext; + pANTLR3_STRING ftext; + pANTLR3_EXCEPTION ex; + pANTLR3_COMMON_TOKEN theToken; + pANTLR3_BASE_TREE theBaseTree; + pANTLR3_COMMON_TREE theCommonTree; + + // Retrieve some info for easy reading. + // + ex = recognizer->state->exception; + ttext = NULL; + + // See if there is a 'filename' we can use + // + if (ex->streamName == NULL) + { + if (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF) + { + ANTLR3_FPRINTF(stderr, "-end of input-("); + } + else + { + ANTLR3_FPRINTF(stderr, "-unknown source-("); + } + } + else + { + ftext = ex->streamName->to8(ex->streamName); + ANTLR3_FPRINTF(stderr, "%s(", ftext->chars); + } + + // Next comes the line number + // + + ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line); + ANTLR3_FPRINTF(stderr, " : error %d : %s", + recognizer->state->exception->type, + (pANTLR3_UINT8) (recognizer->state->exception->message)); + + + // How we determine the next piece is dependent on which thing raised the + // error. + // + switch (recognizer->type) + { + case ANTLR3_TYPE_PARSER: + + // Prepare the knowledge we know we have + // + parser = (pANTLR3_PARSER) (recognizer->super); + tparser = NULL; + is = parser->tstream->istream; + theToken = (pANTLR3_COMMON_TOKEN)(recognizer->state->exception->token); + ttext = theToken->toString(theToken); + + ANTLR3_FPRINTF(stderr, ", at offset %d", recognizer->state->exception->charPositionInLine); + if (theToken != NULL) + { + if (theToken->type == ANTLR3_TOKEN_EOF) + { + ANTLR3_FPRINTF(stderr, ", at <EOF>"); + } + else + { + // Guard against null text in a token + // + ANTLR3_FPRINTF(stderr, "\n near %s\n ", ttext == NULL ? (pANTLR3_UINT8)"<no text for the token>" : ttext->chars); + } + } + break; + + case ANTLR3_TYPE_TREE_PARSER: + + tparser = (pANTLR3_TREE_PARSER) (recognizer->super); + parser = NULL; + is = tparser->ctnstream->tnstream->istream; + theBaseTree = (pANTLR3_BASE_TREE)(recognizer->state->exception->token); + ttext = theBaseTree->toStringTree(theBaseTree); + + if (theBaseTree != NULL) + { + theCommonTree = (pANTLR3_COMMON_TREE) theBaseTree->super; + + if (theCommonTree != NULL) + { + theToken = (pANTLR3_COMMON_TOKEN) theBaseTree->getToken(theBaseTree); + } + ANTLR3_FPRINTF(stderr, ", at offset %d", theBaseTree->getCharPositionInLine(theBaseTree)); + ANTLR3_FPRINTF(stderr, ", near %s", ttext->chars); + } + break; + + default: + + ANTLR3_FPRINTF(stderr, "Base recognizer function displayRecognitionError called by unknown parser type - provide override for this function\n"); + return; + break; + } + + // Although this function should generally be provided by the implementation, this one + // should be as helpful as possible for grammar developers and serve as an example + // of what you can do with each exception type. In general, when you make up your + // 'real' handler, you should debug the routine with all possible errors you expect + // which will then let you be as specific as possible about all circumstances. + // + // Note that in the general case, errors thrown by tree parsers indicate a problem + // with the output of the parser or with the tree grammar itself. The job of the parser + // is to produce a perfect (in traversal terms) syntactically correct tree, so errors + // at that stage should really be semantic errors that your own code determines and handles + // in whatever way is appropriate. + // + switch (ex->type) + { + case ANTLR3_UNWANTED_TOKEN_EXCEPTION: + + // Indicates that the recognizer was fed a token which seesm to be + // spurious input. We can detect this when the token that follows + // this unwanted token would normally be part of the syntactically + // correct stream. Then we can see that the token we are looking at + // is just something that should not be there and throw this exception. + // + if (tokenNames == NULL) + { + ANTLR3_FPRINTF(stderr, " : Extraneous input..."); + } + else + { + if (ex->expecting == ANTLR3_TOKEN_EOF) + { + ANTLR3_FPRINTF(stderr, " : Extraneous input - expected <EOF>\n"); + } + else + { + ANTLR3_FPRINTF(stderr, " : Extraneous input - expected %s ...\n", tokenNames[ex->expecting]); + } + } + break; + + case ANTLR3_MISSING_TOKEN_EXCEPTION: + + // Indicates that the recognizer detected that the token we just + // hit would be valid syntactically if preceeded by a particular + // token. Perhaps a missing ';' at line end or a missing ',' in an + // expression list, and such like. + // + if (tokenNames == NULL) + { + ANTLR3_FPRINTF(stderr, " : Missing token (%d)...\n", ex->expecting); + } + else + { + if (ex->expecting == ANTLR3_TOKEN_EOF) + { + ANTLR3_FPRINTF(stderr, " : Missing <EOF>\n"); + } + else + { + ANTLR3_FPRINTF(stderr, " : Missing %s \n", tokenNames[ex->expecting]); + } + } + break; + + case ANTLR3_RECOGNITION_EXCEPTION: + + // Indicates that the recognizer received a token + // in the input that was not predicted. This is the basic exception type + // from which all others are derived. So we assume it was a syntax error. + // You may get this if there are not more tokens and more are needed + // to complete a parse for instance. + // + ANTLR3_FPRINTF(stderr, " : syntax error...\n"); + break; + + case ANTLR3_MISMATCHED_TOKEN_EXCEPTION: + + // We were expecting to see one thing and got another. This is the + // most common error if we coudl not detect a missing or unwanted token. + // Here you can spend your efforts to + // derive more useful error messages based on the expected + // token set and the last token and so on. The error following + // bitmaps do a good job of reducing the set that we were looking + // for down to something small. Knowing what you are parsing may be + // able to allow you to be even more specific about an error. + // + if (tokenNames == NULL) + { + ANTLR3_FPRINTF(stderr, " : syntax error...\n"); + } + else + { + if (ex->expecting == ANTLR3_TOKEN_EOF) + { + ANTLR3_FPRINTF(stderr, " : expected <EOF>\n"); + } + else + { + ANTLR3_FPRINTF(stderr, " : expected %s ...\n", tokenNames[ex->expecting]); + } + } + break; + + case ANTLR3_NO_VIABLE_ALT_EXCEPTION: + + // We could not pick any alt decision from the input given + // so god knows what happened - however when you examine your grammar, + // you should. It means that at the point where the current token occurred + // that the DFA indicates nowhere to go from here. + // + ANTLR3_FPRINTF(stderr, " : cannot match to any predicted input...\n"); + + break; + + case ANTLR3_MISMATCHED_SET_EXCEPTION: + + { + ANTLR3_UINT32 count; + ANTLR3_UINT32 bit; + ANTLR3_UINT32 size; + ANTLR3_UINT32 numbits; + pANTLR3_BITSET errBits; + + // This means we were able to deal with one of a set of + // possible tokens at this point, but we did not see any + // member of that set. + // + ANTLR3_FPRINTF(stderr, " : unexpected input...\n expected one of : "); + + // What tokens could we have accepted at this point in the + // parse? + // + count = 0; + errBits = antlr3BitsetLoad (ex->expectingSet); + numbits = errBits->numBits (errBits); + size = errBits->size (errBits); + + if (size > 0) + { + // However many tokens we could have dealt with here, it is usually + // not useful to print ALL of the set here. I arbitrarily chose 8 + // here, but you should do whatever makes sense for you of course. + // No token number 0, so look for bit 1 and on. + // + for (bit = 1; bit < numbits && count < 8 && count < size; bit++) + { + // TODO: This doesn;t look right - should be asking if the bit is set!! + // + if (tokenNames[bit]) + { + ANTLR3_FPRINTF(stderr, "%s%s", count > 0 ? ", " : "", tokenNames[bit]); + count++; + } + } + ANTLR3_FPRINTF(stderr, "\n"); + } + else + { + ANTLR3_FPRINTF(stderr, "Actually dude, we didn't seem to be expecting anything here, or at least\n"); + ANTLR3_FPRINTF(stderr, "I could not work out what I was expecting, like so many of us these days!\n"); + } + } + break; + + case ANTLR3_EARLY_EXIT_EXCEPTION: + + // We entered a loop requiring a number of token sequences + // but found a token that ended that sequence earlier than + // we should have done. + // + ANTLR3_FPRINTF(stderr, " : missing elements...\n"); + break; + + default: + + // We don't handle any other exceptions here, but you can + // if you wish. If we get an exception that hits this point + // then we are just going to report what we know about the + // token. + // + ANTLR3_FPRINTF(stderr, " : syntax not recognized...\n"); + break; + } + + // Here you have the token that was in error which if this is + // the standard implementation will tell you the line and offset + // and also record the address of the start of the line in the + // input stream. You could therefore print the source line and so on. + // Generally though, I would expect that your lexer/parser will keep + // its own map of lines and source pointers or whatever as there + // are a lot of specific things you need to know about the input + // to do something like that. + // Here is where you do it though :-). + // +} + +/// Return how many syntax errors were detected by this recognizer +/// +static ANTLR3_UINT32 +getNumberOfSyntaxErrors(pANTLR3_BASE_RECOGNIZER recognizer) +{ + return recognizer->state->errorCount; +} + +/// Recover from an error found on the input stream. Mostly this is +/// NoViableAlt exceptions, but could be a mismatched token that +/// the match() routine could not recover from. +/// +static void +recover (pANTLR3_BASE_RECOGNIZER recognizer) +{ + // Used to compute the follow set of tokens + // + pANTLR3_BITSET followSet; + pANTLR3_PARSER parser; + pANTLR3_TREE_PARSER tparser; + pANTLR3_INT_STREAM is; + + switch (recognizer->type) + { + case ANTLR3_TYPE_PARSER: + + parser = (pANTLR3_PARSER) (recognizer->super); + tparser = NULL; + is = parser->tstream->istream; + + break; + + case ANTLR3_TYPE_TREE_PARSER: + + tparser = (pANTLR3_TREE_PARSER) (recognizer->super); + parser = NULL; + is = tparser->ctnstream->tnstream->istream; + + break; + + default: + + ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n"); + return; + + break; + } + + // Are we about to repeat the same error? + // + if (recognizer->state->lastErrorIndex == is->index(is)) + { + // The last error was at the same token index point. This must be a case + // where LT(1) is in the recovery token set so nothing is + // consumed. Consume a single token so at least to prevent + // an infinite loop; this is a failsafe. + // + is->consume(is); + } + + // Record error index position + // + recognizer->state->lastErrorIndex = is->index(is); + + // Work out the follows set for error recovery + // + followSet = recognizer->computeErrorRecoverySet(recognizer); + + // Call resync hook (for debuggers and so on) + // + recognizer->beginResync(recognizer); + + // Consume tokens until we have resynced to something in the follows set + // + recognizer->consumeUntilSet(recognizer, followSet); + + // End resync hook + // + recognizer->endResync(recognizer); + + // Destroy the temporary bitset we produced. + // + followSet->free(followSet); + + // Reset the inError flag so we don't re-report the exception + // + recognizer->state->error = ANTLR3_FALSE; + recognizer->state->failed = ANTLR3_FALSE; +} + + +/// Attempt to recover from a single missing or extra token. +/// +/// EXTRA TOKEN +/// +/// LA(1) is not what we are looking for. If LA(2) has the right token, +/// however, then assume LA(1) is some extra spurious token. Delete it +/// and LA(2) as if we were doing a normal match(), which advances the +/// input. +/// +/// MISSING TOKEN +/// +/// If current token is consistent with what could come after +/// ttype then it is ok to "insert" the missing token, else throw +/// exception For example, Input "i=(3;" is clearly missing the +/// ')'. When the parser returns from the nested call to expr, it +/// will have call chain: +/// +/// stat -> expr -> atom +/// +/// and it will be trying to match the ')' at this point in the +/// derivation: +/// +/// => ID '=' '(' INT ')' ('+' atom)* ';' +/// ^ +/// match() will see that ';' doesn't match ')' and report a +/// mismatched token error. To recover, it sees that LA(1)==';' +/// is in the set of tokens that can follow the ')' token +/// reference in rule atom. It can assume that you forgot the ')'. +/// +/// The exception that was passed in, in the java implementation is +/// sorted in the recognizer exception stack in the C version. To 'throw' it we set the +/// error flag and rules cascade back when this is set. +/// +static void * +recoverFromMismatchedToken (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow) +{ + pANTLR3_PARSER parser; + pANTLR3_TREE_PARSER tparser; + pANTLR3_INT_STREAM is; + void * matchedSymbol; + + + + switch (recognizer->type) + { + case ANTLR3_TYPE_PARSER: + + parser = (pANTLR3_PARSER) (recognizer->super); + tparser = NULL; + is = parser->tstream->istream; + + break; + + case ANTLR3_TYPE_TREE_PARSER: + + tparser = (pANTLR3_TREE_PARSER) (recognizer->super); + parser = NULL; + is = tparser->ctnstream->tnstream->istream; + + break; + + default: + + ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedToken called by unknown parser type - provide override for this function\n"); + return NULL; + + break; + } + + // Create an exception if we need one + // + if (recognizer->state->exception == NULL) + { + antlr3RecognitionExceptionNew(recognizer); + } + + // If the next token after the one we are looking at in the input stream + // is what we are looking for then we remove the one we have discovered + // from the stream by consuming it, then consume this next one along too as + // if nothing had happened. + // + if ( recognizer->mismatchIsUnwantedToken(recognizer, is, ttype) == ANTLR3_TRUE) + { + recognizer->state->exception->type = ANTLR3_UNWANTED_TOKEN_EXCEPTION; + recognizer->state->exception->message = ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME; + + // Call resync hook (for debuggers and so on) + // + if (recognizer->debugger != NULL) + { + recognizer->debugger->beginResync(recognizer->debugger); + } + + // "delete" the extra token + // + recognizer->beginResync(recognizer); + is->consume(is); + recognizer->endResync(recognizer); + // End resync hook + // + if (recognizer->debugger != NULL) + { + recognizer->debugger->endResync(recognizer->debugger); + } + + // Print out the error after we consume so that ANTLRWorks sees the + // token in the exception. + // + recognizer->reportError(recognizer); + + // Return the token we are actually matching + // + matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is); + + // Consume the token that the rule actually expected to get as if everything + // was hunky dory. + // + is->consume(is); + + recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more + + return matchedSymbol; + } + + // Single token deletion (Unwanted above) did not work + // so we see if we can insert a token instead by calculating which + // token would be missing + // + if (mismatchIsMissingToken(recognizer, is, follow)) + { + // We can fake the missing token and proceed + // + matchedSymbol = recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ttype, follow); + recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION; + recognizer->state->exception->message = ANTLR3_MISSING_TOKEN_EXCEPTION_NAME; + recognizer->state->exception->token = matchedSymbol; + recognizer->state->exception->expecting = ttype; + + // Print out the error after we insert so that ANTLRWorks sees the + // token in the exception. + // + recognizer->reportError(recognizer); + + recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more + + return matchedSymbol; + } + + + // Neither deleting nor inserting tokens allows recovery + // must just report the exception. + // + recognizer->state->error = ANTLR3_TRUE; + return NULL; +} + +static void * +recoverFromMismatchedSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow) +{ + pANTLR3_PARSER parser; + pANTLR3_TREE_PARSER tparser; + pANTLR3_INT_STREAM is; + pANTLR3_COMMON_TOKEN matchedSymbol; + + switch (recognizer->type) + { + case ANTLR3_TYPE_PARSER: + + parser = (pANTLR3_PARSER) (recognizer->super); + tparser = NULL; + is = parser->tstream->istream; + + break; + + case ANTLR3_TYPE_TREE_PARSER: + + tparser = (pANTLR3_TREE_PARSER) (recognizer->super); + parser = NULL; + is = tparser->ctnstream->tnstream->istream; + + break; + + default: + + ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedSet called by unknown parser type - provide override for this function\n"); + return NULL; + + break; + } + + if (recognizer->mismatchIsMissingToken(recognizer, is, follow) == ANTLR3_TRUE) + { + // We can fake the missing token and proceed + // + matchedSymbol = recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ANTLR3_TOKEN_INVALID, follow); + recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION; + recognizer->state->exception->token = matchedSymbol; + + // Print out the error after we insert so that ANTLRWorks sees the + // token in the exception. + // + recognizer->reportError(recognizer); + + recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more + + return matchedSymbol; + } + + // TODO - Single token deletion like in recoverFromMismatchedToken() + // + recognizer->state->error = ANTLR3_TRUE; + recognizer->state->failed = ANTLR3_TRUE; + return NULL; +} + +/// This code is factored out from mismatched token and mismatched set +/// recovery. It handles "single token insertion" error recovery for +/// both. No tokens are consumed to recover from insertions. Return +/// true if recovery was possible else return false. +/// +static ANTLR3_BOOLEAN +recoverFromMismatchedElement (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST followBits) +{ + pANTLR3_BITSET viableToksFollowingRule; + pANTLR3_BITSET follow; + pANTLR3_PARSER parser; + pANTLR3_TREE_PARSER tparser; + pANTLR3_INT_STREAM is; + + switch (recognizer->type) + { + case ANTLR3_TYPE_PARSER: + + parser = (pANTLR3_PARSER) (recognizer->super); + tparser = NULL; + is = parser->tstream->istream; + + break; + + case ANTLR3_TYPE_TREE_PARSER: + + tparser = (pANTLR3_TREE_PARSER) (recognizer->super); + parser = NULL; + is = tparser->ctnstream->tnstream->istream; + + break; + + default: + + ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n"); + return ANTLR3_FALSE; + + break; + } + + follow = antlr3BitsetLoad(followBits); + + if (follow == NULL) + { + /* The follow set is NULL, which means we don't know what can come + * next, so we "hit and hope" by just signifying that we cannot + * recover, which will just cause the next token to be consumed, + * which might dig us out. + */ + return ANTLR3_FALSE; + } + + /* We have a bitmap for the follow set, hence we can compute + * what can follow this grammar element reference. + */ + if (follow->isMember(follow, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_TRUE) + { + /* First we need to know which of the available tokens are viable + * to follow this reference. + */ + viableToksFollowingRule = recognizer->computeCSRuleFollow(recognizer); + + /* Remove the EOR token, which we do not wish to compute with + */ + follow->remove(follow, ANTLR3_EOR_TOKEN_TYPE); + viableToksFollowingRule->free(viableToksFollowingRule); + /* We now have the computed set of what can follow the current token + */ + } + + /* We can now see if the current token works with the set of tokens + * that could follow the current grammar reference. If it looks like it + * is consistent, then we can "insert" that token by not throwing + * an exception and assuming that we saw it. + */ + if ( follow->isMember(follow, is->_LA(is, 1)) == ANTLR3_TRUE) + { + /* report the error, but don't cause any rules to abort and stuff + */ + recognizer->reportError(recognizer); + if (follow != NULL) + { + follow->free(follow); + } + recognizer->state->error = ANTLR3_FALSE; + recognizer->state->failed = ANTLR3_FALSE; + return ANTLR3_TRUE; /* Success in recovery */ + } + + if (follow != NULL) + { + follow->free(follow); + } + + /* We could not find anything viable to do, so this is going to + * cause an exception. + */ + return ANTLR3_FALSE; +} + +/// Eat tokens from the input stream until we get one of JUST the right type +/// +static void +consumeUntil (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType) +{ + ANTLR3_UINT32 ttype; + pANTLR3_PARSER parser; + pANTLR3_TREE_PARSER tparser; + pANTLR3_INT_STREAM is; + + switch (recognizer->type) + { + case ANTLR3_TYPE_PARSER: + + parser = (pANTLR3_PARSER) (recognizer->super); + tparser = NULL; + is = parser->tstream->istream; + + break; + + case ANTLR3_TYPE_TREE_PARSER: + + tparser = (pANTLR3_TREE_PARSER) (recognizer->super); + parser = NULL; + is = tparser->ctnstream->tnstream->istream; + + break; + + default: + + ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntil' called by unknown parser type - provide override for this function\n"); + return; + + break; + } + + // What do have at the moment? + // + ttype = is->_LA(is, 1); + + // Start eating tokens until we get to the one we want. + // + while (ttype != ANTLR3_TOKEN_EOF && ttype != tokenType) + { + is->consume(is); + ttype = is->_LA(is, 1); + } +} + +/// Eat tokens from the input stream until we find one that +/// belongs to the supplied set. +/// +static void +consumeUntilSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set) +{ + ANTLR3_UINT32 ttype; + pANTLR3_PARSER parser; + pANTLR3_TREE_PARSER tparser; + pANTLR3_INT_STREAM is; + + switch (recognizer->type) + { + case ANTLR3_TYPE_PARSER: + + parser = (pANTLR3_PARSER) (recognizer->super); + tparser = NULL; + is = parser->tstream->istream; + + break; + + case ANTLR3_TYPE_TREE_PARSER: + + tparser = (pANTLR3_TREE_PARSER) (recognizer->super); + parser = NULL; + is = tparser->ctnstream->tnstream->istream; + + break; + + default: + + ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntilSet' called by unknown parser type - provide override for this function\n"); + return; + + break; + } + + // What do have at the moment? + // + ttype = is->_LA(is, 1); + + // Start eating tokens until we get to one we want. + // + while (ttype != ANTLR3_TOKEN_EOF && set->isMember(set, ttype) == ANTLR3_FALSE) + { + is->consume(is); + ttype = is->_LA(is, 1); + } +} + +/** Return the rule invocation stack (how we got here in the parse. + * In the java version Ter just asks the JVM for all the information + * but in C we don't get this information, so I am going to do nothing + * right now. + */ +static pANTLR3_STACK +getRuleInvocationStack (pANTLR3_BASE_RECOGNIZER recognizer) +{ + return NULL; +} + +static pANTLR3_STACK +getRuleInvocationStackNamed (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name) +{ + return NULL; +} + +/** Convenience method for template rewrites - NYI. + */ +static pANTLR3_HASH_TABLE +toStrings (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE tokens) +{ + return NULL; +} + +static void ANTLR3_CDECL +freeIntTrie (void * trie) +{ + ((pANTLR3_INT_TRIE)trie)->free((pANTLR3_INT_TRIE)trie); +} + + +/** Pointer to a function to return whether the rule has parsed input starting at the supplied + * start index before. If the rule has not parsed input starting from the supplied start index, + * then it will return ANTLR3_MEMO_RULE_UNKNOWN. If it has parsed from the suppled start point + * then it will return the point where it last stopped parsing after that start point. + * + * \remark + * The rule memos are an ANTLR3_LIST of ANTLR3_LISTS, however if this becomes any kind of performance + * issue (it probably won't, the hash tables are pretty quick) then we could make a special int only + * version of the table. + */ +static ANTLR3_MARKER +getRuleMemoization (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart) +{ + /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST. + */ + pANTLR3_INT_TRIE ruleList; + ANTLR3_MARKER stopIndex; + pANTLR3_TRIE_ENTRY entry; + + /* See if we have a list in the ruleMemos for this rule, and if not, then create one + * as we will need it eventually if we are being asked for the memo here. + */ + entry = recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex); + + if (entry == NULL) + { + /* Did not find it, so create a new one for it, with a bit depth based on the + * size of the input stream. We need the bit depth to incorporate the number if + * bits required to represent the largest possible stop index in the input, which is the + * last character. An int stream is free to return the largest 64 bit offset if it has + * no idea of the size, but you should remember that this will cause the leftmost + * bit match algorithm to run to 63 bits, which will be the whole time spent in the trie ;-) + */ + ruleList = antlr3IntTrieNew(63); /* Depth is theoretically 64 bits, but probably not ;-) */ + + if (ruleList != NULL) + { + recognizer->state->ruleMemo->add(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex, ANTLR3_HASH_TYPE_STR, 0, ANTLR3_FUNC_PTR(ruleList), freeIntTrie); + } + + /* We cannot have a stopIndex in a trie we have just created of course + */ + return MEMO_RULE_UNKNOWN; + } + + ruleList = (pANTLR3_INT_TRIE) (entry->data.ptr); + + /* See if there is a stop index associated with the supplied start index. + */ + stopIndex = 0; + + entry = ruleList->get(ruleList, ruleParseStart); + if (entry != NULL) + { + stopIndex = (ANTLR3_MARKER)(entry->data.intVal); + } + + if (stopIndex == 0) + { + return MEMO_RULE_UNKNOWN; + } + + return stopIndex; +} + +/** Has this rule already parsed input at the current index in the + * input stream? Return ANTLR3_TRUE if we have and ANTLR3_FALSE + * if we have not. + * + * This method has a side-effect: if we have seen this input for + * this rule and successfully parsed before, then seek ahead to + * 1 past the stop token matched for this rule last time. + */ +static ANTLR3_BOOLEAN +alreadyParsedRule (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex) +{ + ANTLR3_MARKER stopIndex; + pANTLR3_LEXER lexer; + pANTLR3_PARSER parser; + pANTLR3_TREE_PARSER tparser; + pANTLR3_INT_STREAM is; + + switch (recognizer->type) + { + case ANTLR3_TYPE_PARSER: + + parser = (pANTLR3_PARSER) (recognizer->super); + tparser = NULL; + lexer = NULL; + is = parser->tstream->istream; + + break; + + case ANTLR3_TYPE_TREE_PARSER: + + tparser = (pANTLR3_TREE_PARSER) (recognizer->super); + parser = NULL; + lexer = NULL; + is = tparser->ctnstream->tnstream->istream; + + break; + + case ANTLR3_TYPE_LEXER: + + lexer = (pANTLR3_LEXER) (recognizer->super); + parser = NULL; + tparser = NULL; + is = lexer->input->istream; + break; + + default: + + ANTLR3_FPRINTF(stderr, "Base recognizer function 'alreadyParsedRule' called by unknown parser type - provide override for this function\n"); + return ANTLR3_FALSE; + + break; + } + + /* See if we have a memo marker for this. + */ + stopIndex = recognizer->getRuleMemoization(recognizer, ruleIndex, is->index(is)); + + if (stopIndex == MEMO_RULE_UNKNOWN) + { + return ANTLR3_FALSE; + } + + if (stopIndex == MEMO_RULE_FAILED) + { + recognizer->state->failed = ANTLR3_TRUE; + } + else + { + is->seek(is, stopIndex+1); + } + + /* If here then the rule was executed for this input already + */ + return ANTLR3_TRUE; +} + +/** Record whether or not this rule parsed the input at this position + * successfully. + */ +static void +memoize (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart) +{ + /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST. + */ + pANTLR3_INT_TRIE ruleList; + pANTLR3_TRIE_ENTRY entry; + ANTLR3_MARKER stopIndex; + pANTLR3_LEXER lexer; + pANTLR3_PARSER parser; + pANTLR3_TREE_PARSER tparser; + pANTLR3_INT_STREAM is; + + switch (recognizer->type) + { + case ANTLR3_TYPE_PARSER: + + parser = (pANTLR3_PARSER) (recognizer->super); + tparser = NULL; + is = parser->tstream->istream; + + break; + + case ANTLR3_TYPE_TREE_PARSER: + + tparser = (pANTLR3_TREE_PARSER) (recognizer->super); + parser = NULL; + is = tparser->ctnstream->tnstream->istream; + + break; + + case ANTLR3_TYPE_LEXER: + + lexer = (pANTLR3_LEXER) (recognizer->super); + parser = NULL; + tparser = NULL; + is = lexer->input->istream; + break; + + default: + + ANTLR3_FPRINTF(stderr, "Base recognizer function consumeUntilSet called by unknown parser type - provide override for this function\n"); + return; + + break; + } + + stopIndex = recognizer->state->failed == ANTLR3_TRUE ? MEMO_RULE_FAILED : is->index(is) - 1; + + entry = recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex); + + if (entry != NULL) + { + ruleList = (pANTLR3_INT_TRIE)(entry->data.ptr); + + /* If we don't already have this entry, append it. The memoize trie does not + * accept duplicates so it won't add it if already there and we just ignore the + * return code as we don't care if it is there already. + */ + ruleList->add(ruleList, ruleParseStart, ANTLR3_HASH_TYPE_INT, stopIndex, NULL, NULL); + } +} +/** A syntactic predicate. Returns true/false depending on whether + * the specified grammar fragment matches the current input stream. + * This resets the failed instance var afterwards. + */ +static ANTLR3_BOOLEAN +synpred (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx)) +{ + ANTLR3_MARKER start; + pANTLR3_PARSER parser; + pANTLR3_TREE_PARSER tparser; + pANTLR3_INT_STREAM is; + + switch (recognizer->type) + { + case ANTLR3_TYPE_PARSER: + + parser = (pANTLR3_PARSER) (recognizer->super); + tparser = NULL; + is = parser->tstream->istream; + + break; + + case ANTLR3_TYPE_TREE_PARSER: + + tparser = (pANTLR3_TREE_PARSER) (recognizer->super); + parser = NULL; + is = tparser->ctnstream->tnstream->istream; + + break; + + default: + + ANTLR3_FPRINTF(stderr, "Base recognizer function 'synPred' called by unknown parser type - provide override for this function\n"); + return ANTLR3_FALSE; + + break; + } + + /* Begin backtracking so we can get back to where we started after trying out + * the syntactic predicate. + */ + start = is->mark(is); + recognizer->state->backtracking++; + + /* Try the syntactical predicate + */ + predicate(ctx); + + /* Reset + */ + is->rewind(is, start); + recognizer->state->backtracking--; + + if (recognizer->state->failed == ANTLR3_TRUE) + { + /* Predicate failed + */ + recognizer->state->failed = ANTLR3_FALSE; + return ANTLR3_FALSE; + } + else + { + /* Predicate was successful + */ + recognizer->state->failed = ANTLR3_FALSE; + return ANTLR3_TRUE; + } +} + +static void +reset(pANTLR3_BASE_RECOGNIZER recognizer) +{ + if (recognizer->state->following != NULL) + { + recognizer->state->following->free(recognizer->state->following); + } + + // Reset the state flags + // + recognizer->state->errorRecovery = ANTLR3_FALSE; + recognizer->state->lastErrorIndex = -1; + recognizer->state->failed = ANTLR3_FALSE; + recognizer->state->errorCount = 0; + recognizer->state->backtracking = 0; + recognizer->state->following = NULL; + + if (recognizer->state != NULL) + { + if (recognizer->state->ruleMemo != NULL) + { + recognizer->state->ruleMemo->free(recognizer->state->ruleMemo); + recognizer->state->ruleMemo = antlr3IntTrieNew(15); /* 16 bit depth is enough for 32768 rules! */ + } + } + + + // Install a new following set + // + recognizer->state->following = antlr3StackNew(8); + +} + +// Default implementation is for parser and assumes a token stream as supplied by the runtime. +// You MAY need override this function if the standard TOKEN_STREAM is not what you are using. +// +static void * +getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream) +{ + return ((pANTLR3_TOKEN_STREAM)istream->super)->_LT((pANTLR3_TOKEN_STREAM)istream->super, 1); +} + +// Default implementation is for parser and assumes a token stream as supplied by the runtime. +// You MAY need override this function if the standard COMMON_TOKEN_STREAM is not what you are using. +// +static void * +getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e, + ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow) +{ + pANTLR3_TOKEN_STREAM ts; + pANTLR3_COMMON_TOKEN_STREAM cts; + pANTLR3_COMMON_TOKEN token; + pANTLR3_COMMON_TOKEN current; + pANTLR3_STRING text; + + // Dereference the standard pointers + // + ts = (pANTLR3_TOKEN_STREAM)istream->super; + cts = (pANTLR3_COMMON_TOKEN_STREAM)ts->super; + + // Work out what to use as the current symbol to make a line and offset etc + // If we are at EOF, we use the token before EOF + // + current = ts->_LT(ts, 1); + if (current->getType(current) == ANTLR3_TOKEN_EOF) + { + current = ts->_LT(ts, -1); + } + + // Create a new empty token + // + if (recognizer->state->tokFactory == NULL) + { + // We don't yet have a token factory for making tokens + // we just need a fake one using the input stream of the current + // token. + // + recognizer->state->tokFactory = antlr3TokenFactoryNew(current->input); + } + token = recognizer->state->tokFactory->newToken(recognizer->state->tokFactory); + + // Set some of the token properties based on the current token + // + token->setLine (token, current->getLine(current)); + token->setCharPositionInLine (token, current->getCharPositionInLine(current)); + token->setChannel (token, ANTLR3_TOKEN_DEFAULT_CHANNEL); + token->setType (token, expectedTokenType); + token->user1 = current->user1; + token->user2 = current->user2; + token->user3 = current->user3; + token->custom = current->custom; + token->lineStart = current->lineStart; + + // Create the token text that shows it has been inserted + // + token->setText8(token, (pANTLR3_UINT8)"<missing "); + text = token->getText(token); + + if (text != NULL) + { + text->append8(text, (const char *)recognizer->state->tokenNames[expectedTokenType]); + text->append8(text, (const char *)">"); + } + + // Finally return the pointer to our new token + // + return token; +} + + +#ifdef ANTLR3_WINDOWS +#pragma warning( default : 4100 ) +#endif + +/// @} +/// + diff --git a/impl/antlr/libantlr3c-3.4/src/antlr3basetree.c b/impl/antlr/libantlr3c-3.4/src/antlr3basetree.c new file mode 100644 index 0000000..bbc81e7 --- /dev/null +++ b/impl/antlr/libantlr3c-3.4/src/antlr3basetree.c @@ -0,0 +1,489 @@ +#include <antlr3basetree.h> + +#ifdef ANTLR3_WINDOWS +#pragma warning( disable : 4100 ) +#endif + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +static void * getChild (pANTLR3_BASE_TREE tree, ANTLR3_UINT32 i); +static ANTLR3_UINT32 getChildCount (pANTLR3_BASE_TREE tree); +static ANTLR3_UINT32 getCharPositionInLine +(pANTLR3_BASE_TREE tree); +static ANTLR3_UINT32 getLine (pANTLR3_BASE_TREE tree); +static pANTLR3_BASE_TREE +getFirstChildWithType +(pANTLR3_BASE_TREE tree, ANTLR3_UINT32 type); +static void addChild (pANTLR3_BASE_TREE tree, pANTLR3_BASE_TREE child); +static void addChildren (pANTLR3_BASE_TREE tree, pANTLR3_LIST kids); +static void replaceChildren (pANTLR3_BASE_TREE parent, ANTLR3_INT32 startChildIndex, ANTLR3_INT32 stopChildIndex, pANTLR3_BASE_TREE t); + +static void freshenPACIndexesAll(pANTLR3_BASE_TREE tree); +static void freshenPACIndexes (pANTLR3_BASE_TREE tree, ANTLR3_UINT32 offset); + +static void setChild (pANTLR3_BASE_TREE tree, ANTLR3_UINT32 i, void * child); +static void * deleteChild (pANTLR3_BASE_TREE tree, ANTLR3_UINT32 i); +static void * dupTree (pANTLR3_BASE_TREE tree); +static pANTLR3_STRING toStringTree (pANTLR3_BASE_TREE tree); + + +ANTLR3_API pANTLR3_BASE_TREE +antlr3BaseTreeNew(pANTLR3_BASE_TREE tree) +{ + /* api */ + tree->getChild = getChild; + tree->getChildCount = getChildCount; + tree->addChild = (void (*)(pANTLR3_BASE_TREE, void *))(addChild); + tree->addChildren = addChildren; + tree->setChild = setChild; + tree->deleteChild = deleteChild; + tree->dupTree = dupTree; + tree->toStringTree = toStringTree; + tree->getCharPositionInLine = getCharPositionInLine; + tree->getLine = getLine; + tree->replaceChildren = replaceChildren; + tree->freshenPACIndexesAll = freshenPACIndexesAll; + tree->freshenPACIndexes = freshenPACIndexes; + tree->getFirstChildWithType = (void *(*)(pANTLR3_BASE_TREE, ANTLR3_UINT32))(getFirstChildWithType); + tree->children = NULL; + tree->strFactory = NULL; + + /* Rest must be filled in by caller. + */ + return tree; +} + +static ANTLR3_UINT32 +getCharPositionInLine (pANTLR3_BASE_TREE tree) +{ + return 0; +} + +static ANTLR3_UINT32 +getLine (pANTLR3_BASE_TREE tree) +{ + return 0; +} +static pANTLR3_BASE_TREE +getFirstChildWithType (pANTLR3_BASE_TREE tree, ANTLR3_UINT32 type) +{ + ANTLR3_UINT32 i; + ANTLR3_UINT32 cs; + + pANTLR3_BASE_TREE t; + if (tree->children != NULL) + { + cs = tree->children->size(tree->children); + for (i = 0; i < cs; i++) + { + t = (pANTLR3_BASE_TREE) (tree->children->get(tree->children, i)); + if (tree->getType(t) == type) + { + return (pANTLR3_BASE_TREE)t; + } + } + } + return NULL; +} + + + +static void * +getChild (pANTLR3_BASE_TREE tree, ANTLR3_UINT32 i) +{ + if ( tree->children == NULL + || i >= tree->children->size(tree->children)) + { + return NULL; + } + return tree->children->get(tree->children, i); +} + + +static ANTLR3_UINT32 +getChildCount (pANTLR3_BASE_TREE tree) +{ + if (tree->children == NULL) + { + return 0; + } + else + { + return tree->children->size(tree->children); + } +} + +void +addChild (pANTLR3_BASE_TREE tree, pANTLR3_BASE_TREE child) +{ + ANTLR3_UINT32 n; + ANTLR3_UINT32 i; + + if (child == NULL) + { + return; + } + + if (child->isNilNode(child) == ANTLR3_TRUE) + { + if (child->children != NULL && child->children == tree->children) + { + // TODO: Change to exception rather than ANTLR3_FPRINTF? + // + ANTLR3_FPRINTF(stderr, "ANTLR3: An attempt was made to add a child list to itself!\n"); + return; + } + + // Add all of the children's children to this list + // + if (child->children != NULL) + { + if (tree->children == NULL) + { + // We are build ing the tree structure here, so we need not + // worry about duplication of pointers as the tree node + // factory will only clean up each node once. So we just + // copy in the child's children pointer as the child is + // a nil node (has not root itself). + // + tree->children = child->children; + child->children = NULL; + freshenPACIndexesAll(tree); + + } + else + { + // Need to copy the children + // + n = child->children->size(child->children); + + for (i = 0; i < n; i++) + { + pANTLR3_BASE_TREE entry; + entry = child->children->get(child->children, i); + + // ANTLR3 lists can be sparse, unlike Array Lists + // + if (entry != NULL) + { + tree->children->add(tree->children, entry, (void (ANTLR3_CDECL *) (void *))child->free); + } + } + } + } + } + else + { + // Tree we are adding is not a Nil and might have children to copy + // + if (tree->children == NULL) + { + // No children in the tree we are adding to, so create a new list on + // the fly to hold them. + // + tree->createChildrenList(tree); + } + + tree->children->add(tree->children, child, (void (ANTLR3_CDECL *)(void *))child->free); + + } +} + +/// Add all elements of the supplied list as children of this node +/// +static void +addChildren (pANTLR3_BASE_TREE tree, pANTLR3_LIST kids) +{ + ANTLR3_UINT32 i; + ANTLR3_UINT32 s; + + s = kids->size(kids); + for (i = 0; i<s; i++) + { + tree->addChild(tree, (pANTLR3_BASE_TREE)(kids->get(kids, i+1))); + } +} + + +static void +setChild (pANTLR3_BASE_TREE tree, ANTLR3_UINT32 i, void * child) +{ + if (tree->children == NULL) + { + tree->createChildrenList(tree); + } + tree->children->set(tree->children, i, child, NULL, ANTLR3_FALSE); +} + +static void * +deleteChild (pANTLR3_BASE_TREE tree, ANTLR3_UINT32 i) +{ + if ( tree->children == NULL) + { + return NULL; + } + + return tree->children->remove(tree->children, i); +} + +static void * +dupTree (pANTLR3_BASE_TREE tree) +{ + pANTLR3_BASE_TREE newTree; + ANTLR3_UINT32 i; + ANTLR3_UINT32 s; + + newTree = tree->dupNode (tree); + + if (tree->children != NULL) + { + s = tree->children->size (tree->children); + + for (i = 0; i < s; i++) + { + pANTLR3_BASE_TREE t; + pANTLR3_BASE_TREE newNode; + + t = (pANTLR3_BASE_TREE) tree->children->get(tree->children, i); + + if (t!= NULL) + { + newNode = t->dupTree(t); + newTree->addChild(newTree, newNode); + } + } + } + + return newTree; +} + +static pANTLR3_STRING +toStringTree (pANTLR3_BASE_TREE tree) +{ + pANTLR3_STRING string; + ANTLR3_UINT32 i; + ANTLR3_UINT32 n; + pANTLR3_BASE_TREE t; + + if (tree->children == NULL || tree->children->size(tree->children) == 0) + { + return tree->toString(tree); + } + + /* Need a new string with nothing at all in it. + */ + string = tree->strFactory->newRaw(tree->strFactory); + + if (tree->isNilNode(tree) == ANTLR3_FALSE) + { + string->append8 (string, "("); + string->appendS (string, tree->toString(tree)); + string->append8 (string, " "); + } + if (tree->children != NULL) + { + n = tree->children->size(tree->children); + + for (i = 0; i < n; i++) + { + t = (pANTLR3_BASE_TREE) tree->children->get(tree->children, i); + + if (i > 0) + { + string->append8(string, " "); + } + string->appendS(string, t->toStringTree(t)); + } + } + if (tree->isNilNode(tree) == ANTLR3_FALSE) + { + string->append8(string,")"); + } + + return string; +} + +/// Delete children from start to stop and replace with t even if t is +/// a list (nil-root tree). Num of children can increase or decrease. +/// For huge child lists, inserting children can force walking rest of +/// children to set their child index; could be slow. +/// +static void +replaceChildren (pANTLR3_BASE_TREE parent, ANTLR3_INT32 startChildIndex, ANTLR3_INT32 stopChildIndex, pANTLR3_BASE_TREE newTree) +{ + ANTLR3_INT32 replacingHowMany; // How many nodes will go away + ANTLR3_INT32 replacingWithHowMany; // How many nodes will replace them + ANTLR3_INT32 numNewChildren; // Tracking variable + ANTLR3_INT32 delta; // Difference in new vs existing count + + ANTLR3_INT32 i; + ANTLR3_INT32 j; + + pANTLR3_VECTOR newChildren; // Iterator for whatever we are going to add in + ANTLR3_BOOLEAN freeNewChildren; // Whether we created the iterator locally or reused it + + if (parent->children == NULL) + { + ANTLR3_FPRINTF(stderr, "replaceChildren call: Indexes are invalid; no children in list for %s", parent->getText(parent)->chars); + return; + } + + // Either use the existing list of children in the supplied nil node, or build a vector of the + // tree we were given if it is not a nil node, then we treat both situations exactly the same + // + if (newTree->isNilNode(newTree)) + { + newChildren = newTree->children; + freeNewChildren = ANTLR3_FALSE; // We must NO free this memory + } + else + { + newChildren = antlr3VectorNew(1); + if (newChildren == NULL) + { + ANTLR3_FPRINTF(stderr, "replaceChildren: out of memory!!"); + exit(1); + } + newChildren->add(newChildren, (void *)newTree, NULL); + + freeNewChildren = ANTLR3_TRUE; // We must free this memory + } + + // Initialize + // + replacingHowMany = stopChildIndex - startChildIndex + 1; + replacingWithHowMany = newChildren->size(newChildren); + delta = replacingHowMany - replacingWithHowMany; + numNewChildren = newChildren->size(newChildren); + + // If it is the same number of nodes, then do a direct replacement + // + if (delta == 0) + { + pANTLR3_BASE_TREE child; + + // Same number of nodes + // + j = 0; + for (i = startChildIndex; i <= stopChildIndex; i++) + { + child = (pANTLR3_BASE_TREE) newChildren->get(newChildren, j); + parent->children->set(parent->children, i, child, NULL, ANTLR3_FALSE); + child->setParent(child, parent); + child->setChildIndex(child, i); + } + } + else if (delta > 0) + { + ANTLR3_UINT32 indexToDelete; + + // Less nodes than there were before + // reuse what we have then delete the rest + // + for (j = 0; j < numNewChildren; j++) + { + parent->children->set(parent->children, startChildIndex + j, newChildren->get(newChildren, j), NULL, ANTLR3_FALSE); + } + + // We just delete the same index position until done + // + indexToDelete = startChildIndex + numNewChildren; + + for (j = indexToDelete; j <= (ANTLR3_INT32)stopChildIndex; j++) + { + parent->children->remove(parent->children, indexToDelete); + } + + parent->freshenPACIndexes(parent, startChildIndex); + } + else + { + ANTLR3_UINT32 numToInsert; + + // More nodes than there were before + // Use what we can, then start adding + // + for (j = 0; j < replacingHowMany; j++) + { + parent->children->set(parent->children, startChildIndex + j, newChildren->get(newChildren, j), NULL, ANTLR3_FALSE); + } + + numToInsert = replacingWithHowMany - replacingHowMany; + + for (j = replacingHowMany; j < replacingWithHowMany; j++) + { + parent->children->add(parent->children, newChildren->get(newChildren, j), NULL); + } + + parent->freshenPACIndexes(parent, startChildIndex); + } + + if (freeNewChildren == ANTLR3_TRUE) + { + ANTLR3_FREE(newChildren->elements); + newChildren->elements = NULL; + newChildren->size = 0; + ANTLR3_FREE(newChildren); // Will not free the nodes + } +} + +/// Set the parent and child indexes for all children of the +/// supplied tree. +/// +static void +freshenPACIndexesAll(pANTLR3_BASE_TREE tree) +{ + tree->freshenPACIndexes(tree, 0); +} + +/// Set the parent and child indexes for some of the children of the +/// supplied tree, starting with the child at the supplied index. +/// +static void +freshenPACIndexes (pANTLR3_BASE_TREE tree, ANTLR3_UINT32 offset) +{ + ANTLR3_UINT32 count; + ANTLR3_UINT32 c; + + count = tree->getChildCount(tree); // How many children do we have + + // Loop from the supplied index and set the indexes and parent + // + for (c = offset; c < count; c++) + { + pANTLR3_BASE_TREE child; + + child = tree->getChild(tree, c); + + child->setChildIndex(child, c); + child->setParent(child, tree); + } +} + diff --git a/impl/antlr/libantlr3c-3.4/src/antlr3basetreeadaptor.c b/impl/antlr/libantlr3c-3.4/src/antlr3basetreeadaptor.c new file mode 100644 index 0000000..e35878f --- /dev/null +++ b/impl/antlr/libantlr3c-3.4/src/antlr3basetreeadaptor.c @@ -0,0 +1,909 @@ +/** \file + * Contains the base functions that all tree adaptors start with. + * this implementation can then be overridden by any higher implementation. + * + */ + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <antlr3basetreeadaptor.h> + +#ifdef ANTLR3_WINDOWS +#pragma warning( disable : 4100 ) +#endif + +/* Interface functions + */ +static pANTLR3_BASE_TREE nilNode (pANTLR3_BASE_TREE_ADAPTOR adaptor); +static pANTLR3_BASE_TREE dbgNil (pANTLR3_BASE_TREE_ADAPTOR adaptor); +static pANTLR3_BASE_TREE dupTree (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t); +static pANTLR3_BASE_TREE dbgDupTree (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t); +static pANTLR3_BASE_TREE dupTreeTT (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, pANTLR3_BASE_TREE parent); +static void addChild (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, pANTLR3_BASE_TREE child); +static void dbgAddChild (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, pANTLR3_BASE_TREE child); +static pANTLR3_BASE_TREE becomeRoot (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE newRoot, pANTLR3_BASE_TREE oldRoot); +static pANTLR3_BASE_TREE dbgBecomeRoot (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE newRoot, pANTLR3_BASE_TREE oldRoot); +static pANTLR3_BASE_TREE rulePostProcessing (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE root); +static void addChildToken (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, pANTLR3_COMMON_TOKEN child); +static void dbgAddChildToken (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, pANTLR3_COMMON_TOKEN child); +static pANTLR3_BASE_TREE becomeRootToken (pANTLR3_BASE_TREE_ADAPTOR adaptor, void * newRoot, pANTLR3_BASE_TREE oldRoot); +static pANTLR3_BASE_TREE dbgBecomeRootToken (pANTLR3_BASE_TREE_ADAPTOR adaptor, void * newRoot, pANTLR3_BASE_TREE oldRoot); +static pANTLR3_BASE_TREE createTypeToken (pANTLR3_BASE_TREE_ADAPTOR adaptor, ANTLR3_UINT32 tokenType, pANTLR3_COMMON_TOKEN fromToken); +static pANTLR3_BASE_TREE dbgCreateTypeToken (pANTLR3_BASE_TREE_ADAPTOR adaptor, ANTLR3_UINT32 tokenType, pANTLR3_COMMON_TOKEN fromToken); +static pANTLR3_BASE_TREE createTypeTokenText (pANTLR3_BASE_TREE_ADAPTOR adaptor, ANTLR3_UINT32 tokenType, pANTLR3_COMMON_TOKEN fromToken, pANTLR3_UINT8 text); +static pANTLR3_BASE_TREE dbgCreateTypeTokenText (pANTLR3_BASE_TREE_ADAPTOR adaptor, ANTLR3_UINT32 tokenType, pANTLR3_COMMON_TOKEN fromToken, pANTLR3_UINT8 text); +static pANTLR3_BASE_TREE createTypeText (pANTLR3_BASE_TREE_ADAPTOR adaptor, ANTLR3_UINT32 tokenType, pANTLR3_UINT8 text); +static pANTLR3_BASE_TREE dbgCreateTypeText (pANTLR3_BASE_TREE_ADAPTOR adaptor, ANTLR3_UINT32 tokenType, pANTLR3_UINT8 text); +static ANTLR3_UINT32 getType (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t); +static void setType (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, ANTLR3_UINT32 type); +static pANTLR3_STRING getText (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t); +static void setText (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_STRING t); +static void setText8 (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_UINT8 t); +static pANTLR3_BASE_TREE getChild (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, ANTLR3_UINT32 i); +static ANTLR3_UINT32 getChildCount (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t); +static ANTLR3_UINT32 getUniqueID (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t); +static ANTLR3_BOOLEAN isNilNode (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t); +static pANTLR3_STRING makeDot (pANTLR3_BASE_TREE_ADAPTOR adaptor, void * theTree); + +/** Given a pointer to a base tree adaptor structure (which is usually embedded in the + * super class the implements the tree adaptor used in the parse), initialize its + * function pointers and so on. + */ +ANTLR3_API void +antlr3BaseTreeAdaptorInit(pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_DEBUG_EVENT_LISTENER debugger) +{ + // Initialize the interface + // + if (debugger == NULL) + { + adaptor->nilNode = (void * (*)(pANTLR3_BASE_TREE_ADAPTOR)) + nilNode; + adaptor->addChild = (void (*)(pANTLR3_BASE_TREE_ADAPTOR, void *, void *)) + addChild; + adaptor->becomeRoot = (void * (*)(pANTLR3_BASE_TREE_ADAPTOR, void *, void *)) + becomeRoot; + adaptor->addChildToken = (void (*)(pANTLR3_BASE_TREE_ADAPTOR, void *, pANTLR3_COMMON_TOKEN)) + addChildToken; + adaptor->becomeRootToken = (void * (*)(pANTLR3_BASE_TREE_ADAPTOR, void *, void *)) + becomeRootToken; + adaptor->createTypeToken = (void * (*)(pANTLR3_BASE_TREE_ADAPTOR, ANTLR3_UINT32, pANTLR3_COMMON_TOKEN)) + createTypeToken; + adaptor->createTypeTokenText = (void * (*)(pANTLR3_BASE_TREE_ADAPTOR, ANTLR3_UINT32, pANTLR3_COMMON_TOKEN, pANTLR3_UINT8)) + createTypeTokenText; + adaptor->createTypeText = (void * (*)(pANTLR3_BASE_TREE_ADAPTOR, ANTLR3_UINT32, pANTLR3_UINT8)) + createTypeText; + adaptor->dupTree = (void * (*)(pANTLR3_BASE_TREE_ADAPTOR, void *)) + dupTree; + } + else + { + adaptor->nilNode = (void * (*)(pANTLR3_BASE_TREE_ADAPTOR)) + dbgNil; + adaptor->addChild = (void (*)(pANTLR3_BASE_TREE_ADAPTOR, void *, void *)) + dbgAddChild; + adaptor->becomeRoot = (void * (*)(pANTLR3_BASE_TREE_ADAPTOR, void *, void *)) + dbgBecomeRoot; + adaptor->addChildToken = (void (*)(pANTLR3_BASE_TREE_ADAPTOR, void *, pANTLR3_COMMON_TOKEN)) + dbgAddChildToken; + adaptor->becomeRootToken = (void * (*)(pANTLR3_BASE_TREE_ADAPTOR, void *, void *)) + dbgBecomeRootToken; + adaptor->createTypeToken = (void * (*)(pANTLR3_BASE_TREE_ADAPTOR, ANTLR3_UINT32, pANTLR3_COMMON_TOKEN)) + dbgCreateTypeToken; + adaptor->createTypeTokenText = (void * (*)(pANTLR3_BASE_TREE_ADAPTOR, ANTLR3_UINT32, pANTLR3_COMMON_TOKEN, pANTLR3_UINT8)) + dbgCreateTypeTokenText; + adaptor->createTypeText = (void * (*)(pANTLR3_BASE_TREE_ADAPTOR, ANTLR3_UINT32, pANTLR3_UINT8)) + dbgCreateTypeText; + adaptor->dupTree = (void * (*)(pANTLR3_BASE_TREE_ADAPTOR, void *)) + dbgDupTree; + debugger->adaptor = adaptor; + } + + adaptor->dupTreeTT = (void * (*)(pANTLR3_BASE_TREE_ADAPTOR, void *, void *)) + dupTreeTT; + adaptor->rulePostProcessing = (void * (*)(pANTLR3_BASE_TREE_ADAPTOR, void *)) + rulePostProcessing; + adaptor->getType = (ANTLR3_UINT32 (*)(pANTLR3_BASE_TREE_ADAPTOR, void *)) + getType; + adaptor->setType = (void (*)(pANTLR3_BASE_TREE_ADAPTOR, void *, ANTLR3_UINT32)) + setType; + adaptor->getText = (pANTLR3_STRING (*) (pANTLR3_BASE_TREE_ADAPTOR, void *)) + getText; + adaptor->setText8 = (void (*)(pANTLR3_BASE_TREE_ADAPTOR, pANTLR3_UINT8)) + setText8; + adaptor->setText = (void (*)(pANTLR3_BASE_TREE_ADAPTOR, pANTLR3_STRING)) + setText; + adaptor->getChild = (void * (*)(pANTLR3_BASE_TREE_ADAPTOR, void *, ANTLR3_UINT32)) + getChild; + adaptor->getChildCount = (ANTLR3_UINT32 (*)(pANTLR3_BASE_TREE_ADAPTOR, void *)) + getChildCount; + adaptor->getUniqueID = (ANTLR3_UINT32 (*)(pANTLR3_BASE_TREE_ADAPTOR, void *)) + getUniqueID; + adaptor->isNilNode = (ANTLR3_BOOLEAN (*)(pANTLR3_BASE_TREE_ADAPTOR, void *)) + isNilNode; + + adaptor->makeDot = (pANTLR3_STRING (*)(pANTLR3_BASE_TREE_ADAPTOR, void *)) + makeDot; + + /* Remaining functions filled in by the caller. + */ + return; +} + +static void +defineDotNodes(pANTLR3_BASE_TREE_ADAPTOR adaptor, void * t, pANTLR3_STRING dotSpec ) +{ + // How many nodes are we talking about? + // + int nCount; + int i; + pANTLR3_BASE_TREE child; + char buff[64]; + pANTLR3_STRING text; + int j; + + + + + + // Count the nodes + // + nCount = adaptor->getChildCount(adaptor, t); + + if (nCount == 0) + { + // This will already have been included as a child of another node + // so there is nothing to add. + // + return; + } + + // For each child of the current tree, define a node using the + // memory address of the node to name it + // + for (i = 0; i<nCount; i++) + { + + // Pick up a pointer for the child + // + child = adaptor->getChild(adaptor, t, i); + + // Name the node + // + sprintf(buff, "\tn%p[label=\"", child); + dotSpec->append8(dotSpec, buff); + text = adaptor->getText(adaptor, child); + for (j = 0; j < (ANTLR3_INT32)(text->len); j++) + { + switch(text->charAt(text, j)) + { + case '"': + + dotSpec->append8(dotSpec, "\\\""); + break; + + case '\n': + + dotSpec->append8(dotSpec, "\\n"); + break; + + case '\r': + + dotSpec->append8(dotSpec, "\\r"); + break; + + default: + + dotSpec->addc(dotSpec, text->charAt(text, j)); + break; + } + } + dotSpec->append8(dotSpec, "\"]\n"); + + // And now define the children of this child (if any) + // + defineDotNodes(adaptor, child, dotSpec); + } + + // Done + // + return; +} + +static void +defineDotEdges(pANTLR3_BASE_TREE_ADAPTOR adaptor, void * t, pANTLR3_STRING dotSpec) +{ + // How many nodes are we talking about? + // + int nCount; + int i; + + if (t == NULL) + { + // No tree, so do nothing + // + return; + } + + // Count the nodes + // + nCount = adaptor->getChildCount(adaptor, t); + + if (nCount == 0) + { + // This will already have been included as a child of another node + // so there is nothing to add. + // + return; + } + + // For each child, define an edge from this parent, then process + // and children of this child in the same way + // + for (i=0; i<nCount; i++) + { + pANTLR3_BASE_TREE child; + char buff[128]; + pANTLR3_STRING text; + int j; + + // Next child + // + child = adaptor->getChild(adaptor, t, i); + + // Create the edge relation + // + sprintf(buff, "\t\tn%p -> n%p\t\t// ", t, child); + + dotSpec->append8(dotSpec, buff); + + // Document the relationship + // + text = adaptor->getText(adaptor, t); + for (j = 0; j < (ANTLR3_INT32)(text->len); j++) + { + switch(text->charAt(text, j)) + { + case '"': + + dotSpec->append8(dotSpec, "\\\""); + break; + + case '\n': + + dotSpec->append8(dotSpec, "\\n"); + break; + + case '\r': + + dotSpec->append8(dotSpec, "\\r"); + break; + + default: + + dotSpec->addc(dotSpec, text->charAt(text, j)); + break; + } + } + + dotSpec->append8(dotSpec, " -> "); + + text = adaptor->getText(adaptor, child); + for (j = 0; j < (ANTLR3_INT32)(text->len); j++) + { + switch(text->charAt(text, j)) + { + case '"': + + dotSpec->append8(dotSpec, "\\\""); + break; + + case '\n': + + dotSpec->append8(dotSpec, "\\n"); + break; + + case '\r': + + dotSpec->append8(dotSpec, "\\r"); + break; + + default: + + dotSpec->addc(dotSpec, text->charAt(text, j)); + break; + } + } + dotSpec->append8(dotSpec, "\n"); + + + // Define edges for this child + // + defineDotEdges(adaptor, child, dotSpec); + } + + // Done + // + return; +} + +/// Produce a DOT specification for graphviz +// +static pANTLR3_STRING +makeDot (pANTLR3_BASE_TREE_ADAPTOR adaptor, void * theTree) +{ + // The string we are building up + // + pANTLR3_STRING dotSpec; + char buff[64]; + pANTLR3_STRING text; + int j; + + dotSpec = adaptor->strFactory->newStr8 + + ( + adaptor->strFactory, + + // Default look and feel + // + (pANTLR3_UINT8) + "digraph {\n\n" + "\tordering=out;\n" + "\tranksep=.4;\n" + "\tbgcolor=\"lightgrey\"; node [shape=box, fixedsize=false, fontsize=12, fontname=\"Helvetica-bold\", fontcolor=\"blue\"\n" + "\twidth=.25, height=.25, color=\"black\", fillcolor=\"white\", style=\"filled, solid, bold\"];\n\n" + "\tedge [arrowsize=.5, color=\"black\", style=\"bold\"]\n\n" + ); + + if (theTree == NULL) + { + // No tree, so create a blank spec + // + dotSpec->append8(dotSpec, "n0[label=\"EMPTY TREE\"]\n"); + return dotSpec; + } + + sprintf(buff, "\tn%p[label=\"", theTree); + dotSpec->append8(dotSpec, buff); + text = adaptor->getText(adaptor, theTree); + for (j = 0; j < (ANTLR3_INT32)(text->len); j++) + { + switch(text->charAt(text, j)) + { + case '"': + + dotSpec->append8(dotSpec, "\\\""); + break; + + case '\n': + + dotSpec->append8(dotSpec, "\\n"); + break; + + case '\r': + + dotSpec->append8(dotSpec, "\\r"); + break; + + default: + + dotSpec->addc(dotSpec, text->charAt(text, j)); + break; + } + } + dotSpec->append8(dotSpec, "\"]\n"); + + // First produce the node defintions + // + defineDotNodes(adaptor, theTree, dotSpec); + dotSpec->append8(dotSpec, "\n"); + defineDotEdges(adaptor, theTree, dotSpec); + + // Terminate the spec + // + dotSpec->append8(dotSpec, "\n}"); + + // Result + // + return dotSpec; +} + + +/** Create and return a nil tree node (no token payload) + */ +static pANTLR3_BASE_TREE +nilNode (pANTLR3_BASE_TREE_ADAPTOR adaptor) +{ + return adaptor->create(adaptor, NULL); +} + +static pANTLR3_BASE_TREE +dbgNil (pANTLR3_BASE_TREE_ADAPTOR adaptor) +{ + pANTLR3_BASE_TREE t; + + t = adaptor->create (adaptor, NULL); + adaptor->debugger->createNode (adaptor->debugger, t); + + return t; +} + +/** Return a duplicate of the entire tree (implementation provided by the + * BASE_TREE interface.) + */ +static pANTLR3_BASE_TREE +dupTree (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t) +{ + return adaptor->dupTreeTT(adaptor, t, NULL); +} + +pANTLR3_BASE_TREE +dupTreeTT (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, pANTLR3_BASE_TREE parent) +{ + pANTLR3_BASE_TREE newTree; + pANTLR3_BASE_TREE child; + pANTLR3_BASE_TREE newSubTree; + ANTLR3_UINT32 n; + ANTLR3_UINT32 i; + + if (t == NULL) + { + return NULL; + } + newTree = t->dupNode(t); + + // Ensure new subtree root has parent/child index set + // + adaptor->setChildIndex (adaptor, newTree, t->getChildIndex(t)); + adaptor->setParent (adaptor, newTree, parent); + n = adaptor->getChildCount (adaptor, t); + + for (i=0; i < n; i++) + { + child = adaptor->getChild (adaptor, t, i); + newSubTree = adaptor->dupTreeTT (adaptor, child, t); + adaptor->addChild (adaptor, newTree, newSubTree); + } + return newTree; +} + +/// Sends the required debugging events for duplicating a tree +/// to the debugger. +/// +static void +simulateTreeConstruction(pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE tree) +{ + ANTLR3_UINT32 n; + ANTLR3_UINT32 i; + pANTLR3_BASE_TREE child; + + // Send the create node event + // + adaptor->debugger->createNode(adaptor->debugger, tree); + + n = adaptor->getChildCount(adaptor, tree); + for (i = 0; i < n; i++) + { + child = adaptor->getChild(adaptor, tree, i); + simulateTreeConstruction(adaptor, child); + adaptor->debugger->addChild(adaptor->debugger, tree, child); + } +} + +pANTLR3_BASE_TREE +dbgDupTree (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE tree) +{ + pANTLR3_BASE_TREE t; + + // Call the normal dup tree mechanism first + // + t = adaptor->dupTreeTT(adaptor, tree, NULL); + + // In order to tell the debugger what we have just done, we now + // simulate the tree building mechanism. THis will fire + // lots of debugging events to the client and look like we + // duped the tree.. + // + simulateTreeConstruction(adaptor, t); + + return t; +} + +/** Add a child to the tree t. If child is a flat tree (a list), make all + * in list children of t. Warning: if t has no children, but child does + * and child isNilNode then it is ok to move children to t via + * t.children = child.children; i.e., without copying the array. This + * is for construction and I'm not sure it's completely general for + * a tree's addChild method to work this way. Make sure you differentiate + * between your tree's addChild and this parser tree construction addChild + * if it's not ok to move children to t with a simple assignment. + */ +static void +addChild (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, pANTLR3_BASE_TREE child) +{ + if (t != NULL && child != NULL) + { + t->addChild(t, child); + } +} +static void +dbgAddChild (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, pANTLR3_BASE_TREE child) +{ + if (t != NULL && child != NULL) + { + t->addChild(t, child); + adaptor->debugger->addChild(adaptor->debugger, t, child); + } +} +/** Use the adaptor implementation to add a child node with the supplied token + */ +static void +addChildToken (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, pANTLR3_COMMON_TOKEN child) +{ + if (t != NULL && child != NULL) + { + adaptor->addChild(adaptor, t, adaptor->create(adaptor, child)); + } +} +static void +dbgAddChildToken (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, pANTLR3_COMMON_TOKEN child) +{ + pANTLR3_BASE_TREE tc; + + if (t != NULL && child != NULL) + { + tc = adaptor->create(adaptor, child); + adaptor->addChild(adaptor, t, tc); + adaptor->debugger->addChild(adaptor->debugger, t, tc); + } +} + +/** If oldRoot is a nil root, just copy or move the children to newRoot. + * If not a nil root, make oldRoot a child of newRoot. + * + * \code + * old=^(nil a b c), new=r yields ^(r a b c) + * old=^(a b c), new=r yields ^(r ^(a b c)) + * \endcode + * + * If newRoot is a nil-rooted single child tree, use the single + * child as the new root node. + * + * \code + * old=^(nil a b c), new=^(nil r) yields ^(r a b c) + * old=^(a b c), new=^(nil r) yields ^(r ^(a b c)) + * \endcode + * + * If oldRoot was null, it's ok, just return newRoot (even if isNilNode). + * + * \code + * old=null, new=r yields r + * old=null, new=^(nil r) yields ^(nil r) + * \endcode + * + * Return newRoot. Throw an exception if newRoot is not a + * simple node or nil root with a single child node--it must be a root + * node. If newRoot is <code>^(nil x)</endcode> return x as newRoot. + * + * Be advised that it's ok for newRoot to point at oldRoot's + * children; i.e., you don't have to copy the list. We are + * constructing these nodes so we should have this control for + * efficiency. + */ +static pANTLR3_BASE_TREE +becomeRoot (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE newRootTree, pANTLR3_BASE_TREE oldRootTree) +{ + pANTLR3_BASE_TREE saveRoot; + + /* Protect against tree rewrites if we are in some sort of error + * state, but have tried to recover. In C we can end up with a null pointer + * for a tree that was not produced. + */ + if (newRootTree == NULL) + { + return oldRootTree; + } + + /* root is just the new tree as is if there is no + * current root tree. + */ + if (oldRootTree == NULL) + { + return newRootTree; + } + + /* Produce ^(nil real-node) + */ + if (newRootTree->isNilNode(newRootTree)) + { + if (newRootTree->getChildCount(newRootTree) > 1) + { + /* TODO: Handle tree exceptions + */ + ANTLR3_FPRINTF(stderr, "More than one node as root! TODO: Create tree exception handling\n"); + return newRootTree; + } + + /* The new root is the first child, keep track of the original newRoot + * because if it was a Nil Node, then we can reuse it now. + */ + saveRoot = newRootTree; + newRootTree = newRootTree->getChild(newRootTree, 0); + + // Reclaim the old nilNode() + // + saveRoot->reuse(saveRoot); + } + + /* Add old root into new root. addChild takes care of the case where oldRoot + * is a flat list (nill rooted tree). All children of oldroot are added to + * new root. + */ + newRootTree->addChild(newRootTree, oldRootTree); + + // If the oldroot tree was a nil node, then we know at this point + // it has become orphaned by the rewrite logic, so we tell it to do + // whatever it needs to do to be reused. + // + if (oldRootTree->isNilNode(oldRootTree)) + { + // We have taken an old Root Tree and appended all its children to the new + // root. In addition though it was a nil node, which means the generated code + // will not reuse it again, so we will reclaim it here. First we want to zero out + // any pointers it was carrying around. We are just the baseTree handler so we + // don't know necessarilly know how to do this for the real node, we just ask the tree itself + // to do it. + // + oldRootTree->reuse(oldRootTree); + } + /* Always returns new root structure + */ + return newRootTree; + +} +static pANTLR3_BASE_TREE +dbgBecomeRoot (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE newRootTree, pANTLR3_BASE_TREE oldRootTree) +{ + pANTLR3_BASE_TREE t; + + t = becomeRoot(adaptor, newRootTree, oldRootTree); + + adaptor->debugger->becomeRoot(adaptor->debugger, newRootTree, oldRootTree); + + return t; +} +/** Transform ^(nil x) to x + */ +static pANTLR3_BASE_TREE + rulePostProcessing (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE root) +{ + pANTLR3_BASE_TREE saveRoot; + + // Keep track of the root we are given. If it is a nilNode, then we + // can reuse it rather than orphaning it! + // + saveRoot = root; + + if (root != NULL && root->isNilNode(root)) + { + if (root->getChildCount(root) == 0) + { + root = NULL; + } + else if (root->getChildCount(root) == 1) + { + root = root->getChild(root, 0); + root->setParent(root, NULL); + root->setChildIndex(root, -1); + + // The root we were given was a nil node, wiht one child, which means it has + // been abandoned and would be lost in the node factory. However + // nodes can be flagged as resuable to prevent this terrible waste + // + saveRoot->reuse(saveRoot); + } + } + + return root; +} + +/** Use the adaptor interface to set a new tree node with the supplied token + * to the root of the tree. + */ +static pANTLR3_BASE_TREE + becomeRootToken (pANTLR3_BASE_TREE_ADAPTOR adaptor, void * newRoot, pANTLR3_BASE_TREE oldRoot) +{ + return adaptor->becomeRoot(adaptor, adaptor->create(adaptor, newRoot), oldRoot); +} +static pANTLR3_BASE_TREE +dbgBecomeRootToken (pANTLR3_BASE_TREE_ADAPTOR adaptor, void * newRoot, pANTLR3_BASE_TREE oldRoot) +{ + pANTLR3_BASE_TREE t; + + t = adaptor->becomeRoot(adaptor, adaptor->create(adaptor, newRoot), oldRoot); + + adaptor->debugger->becomeRoot(adaptor->debugger,t, oldRoot); + + return t; +} + +/** Use the super class supplied create() method to create a new node + * from the supplied token. + */ +static pANTLR3_BASE_TREE +createTypeToken (pANTLR3_BASE_TREE_ADAPTOR adaptor, ANTLR3_UINT32 tokenType, pANTLR3_COMMON_TOKEN fromToken) +{ + /* Create the new token + */ + fromToken = adaptor->createTokenFromToken(adaptor, fromToken); + + /* Set the type of the new token to that supplied + */ + fromToken->setType(fromToken, tokenType); + + /* Return a new node based upon this token + */ + return adaptor->create(adaptor, fromToken); +} +static pANTLR3_BASE_TREE +dbgCreateTypeToken (pANTLR3_BASE_TREE_ADAPTOR adaptor, ANTLR3_UINT32 tokenType, pANTLR3_COMMON_TOKEN fromToken) +{ + pANTLR3_BASE_TREE t; + + t = createTypeToken(adaptor, tokenType, fromToken); + + adaptor->debugger->createNode(adaptor->debugger, t); + + return t; +} + +static pANTLR3_BASE_TREE +createTypeTokenText (pANTLR3_BASE_TREE_ADAPTOR adaptor, ANTLR3_UINT32 tokenType, pANTLR3_COMMON_TOKEN fromToken, pANTLR3_UINT8 text) +{ + /* Create the new token + */ + fromToken = adaptor->createTokenFromToken(adaptor, fromToken); + + /* Set the type of the new token to that supplied + */ + fromToken->setType(fromToken, tokenType); + + /* Set the text of the token accordingly + */ + fromToken->setText8(fromToken, text); + + /* Return a new node based upon this token + */ + return adaptor->create(adaptor, fromToken); +} +static pANTLR3_BASE_TREE +dbgCreateTypeTokenText (pANTLR3_BASE_TREE_ADAPTOR adaptor, ANTLR3_UINT32 tokenType, pANTLR3_COMMON_TOKEN fromToken, pANTLR3_UINT8 text) +{ + pANTLR3_BASE_TREE t; + + t = createTypeTokenText(adaptor, tokenType, fromToken, text); + + adaptor->debugger->createNode(adaptor->debugger, t); + + return t; +} + +static pANTLR3_BASE_TREE + createTypeText (pANTLR3_BASE_TREE_ADAPTOR adaptor, ANTLR3_UINT32 tokenType, pANTLR3_UINT8 text) +{ + pANTLR3_COMMON_TOKEN fromToken; + + /* Create the new token + */ + fromToken = adaptor->createToken(adaptor, tokenType, text); + + /* Return a new node based upon this token + */ + return adaptor->create(adaptor, fromToken); +} +static pANTLR3_BASE_TREE + dbgCreateTypeText (pANTLR3_BASE_TREE_ADAPTOR adaptor, ANTLR3_UINT32 tokenType, pANTLR3_UINT8 text) +{ + pANTLR3_BASE_TREE t; + + t = createTypeText(adaptor, tokenType, text); + + adaptor->debugger->createNode(adaptor->debugger, t); + + return t; + +} +/** Dummy implementation - will be supplied by super class + */ +static ANTLR3_UINT32 + getType (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t) +{ + return 0; +} + +/** Dummy implementation - will be supplied by super class + */ +static void + setType (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, ANTLR3_UINT32 type) +{ + ANTLR3_FPRINTF(stderr, "Internal error - implementor of superclass containing ANTLR3_TREE_ADAPTOR did not implement setType()\n"); +} + +/** Dummy implementation - will be supplied by super class + */ +static pANTLR3_STRING + getText (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t) +{ + ANTLR3_FPRINTF(stderr, "Internal error - implementor of superclass containing ANTLR3_TREE_ADAPTOR did not implement getText()\n"); + return NULL; +} + +/** Dummy implementation - will be supplied by super class + */ +static void + setText (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_STRING t) +{ + ANTLR3_FPRINTF(stderr, "Internal error - implementor of superclass containing ANTLR3_TREE_ADAPTOR did not implement setText()\n"); +} +/** Dummy implementation - will be supplied by super class + */ +static void +setText8 (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_UINT8 t) +{ + ANTLR3_FPRINTF(stderr, "Internal error - implementor of superclass containing ANTLR3_TREE_ADAPTOR did not implement setText()\n"); +} + +static pANTLR3_BASE_TREE + getChild (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE tree, ANTLR3_UINT32 i) +{ + ANTLR3_FPRINTF(stderr, "Internal error - implementor of superclass containing ANTLR3_TREE_ADAPTOR did not implement getChild()\n"); + return NULL; +} + +static ANTLR3_UINT32 + getChildCount (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE tree) +{ + ANTLR3_FPRINTF(stderr, "Internal error - implementor of superclass containing ANTLR3_TREE_ADAPTOR did not implement getChildCount()\n"); + return 0; +} + +/** Returns a uniqueID for the node. Because this is the C implementation + * we can just use its address suitably converted/cast to an integer. + */ +static ANTLR3_UINT32 + getUniqueID (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE node) +{ + return ANTLR3_UINT32_CAST(node); +} + +static ANTLR3_BOOLEAN +isNilNode (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t) +{ + return t->isNilNode(t); +} diff --git a/impl/antlr/libantlr3c-3.4/src/antlr3bitset.c b/impl/antlr/libantlr3c-3.4/src/antlr3bitset.c new file mode 100644 index 0000000..4e63c79 --- /dev/null +++ b/impl/antlr/libantlr3c-3.4/src/antlr3bitset.c @@ -0,0 +1,681 @@ +/// +/// \file +/// Contains the C implementation of ANTLR3 bitsets as adapted from Terence Parr's +/// Java implementation. +/// + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <antlr3bitset.h> + +// External interface +// + +static pANTLR3_BITSET antlr3BitsetClone (pANTLR3_BITSET inSet); +static pANTLR3_BITSET antlr3BitsetOR (pANTLR3_BITSET bitset1, pANTLR3_BITSET bitset2); +static void antlr3BitsetORInPlace (pANTLR3_BITSET bitset, pANTLR3_BITSET bitset2); +static ANTLR3_UINT32 antlr3BitsetSize (pANTLR3_BITSET bitset); +static void antlr3BitsetAdd (pANTLR3_BITSET bitset, ANTLR3_INT32 bit); +static ANTLR3_BOOLEAN antlr3BitsetEquals (pANTLR3_BITSET bitset1, pANTLR3_BITSET bitset2); +static ANTLR3_BOOLEAN antlr3BitsetMember (pANTLR3_BITSET bitset, ANTLR3_UINT32 bit); +static ANTLR3_UINT32 antlr3BitsetNumBits (pANTLR3_BITSET bitset); +static void antlr3BitsetRemove (pANTLR3_BITSET bitset, ANTLR3_UINT32 bit); +static ANTLR3_BOOLEAN antlr3BitsetIsNil (pANTLR3_BITSET bitset); +static pANTLR3_INT32 antlr3BitsetToIntList (pANTLR3_BITSET bitset); + +// Local functions +// +static void growToInclude (pANTLR3_BITSET bitset, ANTLR3_INT32 bit); +static void grow (pANTLR3_BITSET bitset, ANTLR3_INT32 newSize); +static ANTLR3_UINT64 bitMask (ANTLR3_UINT32 bitNumber); +static ANTLR3_UINT32 numWordsToHold (ANTLR3_UINT32 bit); +static ANTLR3_UINT32 wordNumber (ANTLR3_UINT32 bit); +static void antlr3BitsetFree (pANTLR3_BITSET bitset); + +static void +antlr3BitsetFree(pANTLR3_BITSET bitset) +{ + if (bitset->blist.bits != NULL) + { + ANTLR3_FREE(bitset->blist.bits); + bitset->blist.bits = NULL; + } + ANTLR3_FREE(bitset); + + return; +} + +ANTLR3_API pANTLR3_BITSET +antlr3BitsetNew(ANTLR3_UINT32 numBits) +{ + pANTLR3_BITSET bitset; + + ANTLR3_UINT32 numelements; + + // Allocate memory for the bitset structure itself + // + bitset = (pANTLR3_BITSET) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_BITSET)); + + if (bitset == NULL) + { + return NULL; + } + + // Avoid memory thrashing at the up front expense of a few bytes + // + if (numBits < (8 * ANTLR3_BITSET_BITS)) + { + numBits = 8 * ANTLR3_BITSET_BITS; + } + + // No we need to allocate the memory for the number of bits asked for + // in multiples of ANTLR3_UINT64. + // + numelements = ((numBits -1) >> ANTLR3_BITSET_LOG_BITS) + 1; + + bitset->blist.bits = (pANTLR3_BITWORD) ANTLR3_MALLOC((size_t)(numelements * sizeof(ANTLR3_BITWORD))); + memset(bitset->blist.bits, 0, (size_t)(numelements * sizeof(ANTLR3_BITWORD))); + bitset->blist.length = numelements; + + if (bitset->blist.bits == NULL) + { + ANTLR3_FREE(bitset); + return NULL; + } + + antlr3BitsetSetAPI(bitset); + + + // All seems good + // + return bitset; +} + +ANTLR3_API void +antlr3BitsetSetAPI(pANTLR3_BITSET bitset) +{ + bitset->clone = antlr3BitsetClone; + bitset->bor = antlr3BitsetOR; + bitset->borInPlace = antlr3BitsetORInPlace; + bitset->size = antlr3BitsetSize; + bitset->add = antlr3BitsetAdd; + bitset->grow = grow; + bitset->equals = antlr3BitsetEquals; + bitset->isMember = antlr3BitsetMember; + bitset->numBits = antlr3BitsetNumBits; + bitset->remove = antlr3BitsetRemove; + bitset->isNilNode = antlr3BitsetIsNil; + bitset->toIntList = antlr3BitsetToIntList; + + bitset->free = antlr3BitsetFree; +} + +ANTLR3_API pANTLR3_BITSET +antlr3BitsetCopy(pANTLR3_BITSET_LIST blist) +{ + pANTLR3_BITSET bitset; + int numElements; + + // Allocate memory for the bitset structure itself + // + bitset = (pANTLR3_BITSET) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_BITSET)); + + if (bitset == NULL) + { + return NULL; + } + + numElements = blist->length; + + // Avoid memory thrashing at the expense of a few more bytes + // + if (numElements < 8) + { + numElements = 8; + } + + // Install the length in ANTLR3_UINT64 units + // + bitset->blist.length = numElements; + + bitset->blist.bits = (pANTLR3_BITWORD)ANTLR3_MALLOC((size_t)(numElements * sizeof(ANTLR3_BITWORD))); + + if (bitset->blist.bits == NULL) + { + ANTLR3_FREE(bitset); + return NULL; + } + + ANTLR3_MEMCPY(bitset->blist.bits, blist->bits, (ANTLR3_UINT64)(numElements * sizeof(ANTLR3_BITWORD))); + + // All seems good + // + return bitset; +} + +static pANTLR3_BITSET +antlr3BitsetClone(pANTLR3_BITSET inSet) +{ + pANTLR3_BITSET bitset; + + // Allocate memory for the bitset structure itself + // + bitset = antlr3BitsetNew(ANTLR3_BITSET_BITS * inSet->blist.length); + + if (bitset == NULL) + { + return NULL; + } + + // Install the actual bits in the source set + // + ANTLR3_MEMCPY(bitset->blist.bits, inSet->blist.bits, (ANTLR3_UINT64)(inSet->blist.length * sizeof(ANTLR3_BITWORD))); + + // All seems good + // + return bitset; +} + + +ANTLR3_API pANTLR3_BITSET +antlr3BitsetList(pANTLR3_HASH_TABLE list) +{ + pANTLR3_BITSET bitSet; + pANTLR3_HASH_ENUM en; + pANTLR3_HASH_KEY key; + ANTLR3_UINT64 bit; + + // We have no idea what exactly is in the list + // so create a default bitset and then just add stuff + // as we enumerate. + // + bitSet = antlr3BitsetNew(0); + + en = antlr3EnumNew(list); + + while (en->next(en, &key, (void **)(&bit)) == ANTLR3_SUCCESS) + { + bitSet->add(bitSet, (ANTLR3_UINT32)bit); + } + en->free(en); + + return NULL; +} + +/// +/// \brief +/// Creates a new bitset with at least one 64 bit bset of bits, but as +/// many 64 bit sets as are required. +/// +/// \param[in] bset +/// A variable number of bits to add to the set, ending in -1 (impossible bit). +/// +/// \returns +/// A new bit set with all of the specified bitmaps in it and the API +/// initialized. +/// +/// Call as: +/// - pANTLR3_BITSET = antlrBitsetLoad(bset, bset11, ..., -1); +/// - pANTLR3_BITSET = antlrBitsetOf(-1); Create empty bitset +/// +/// \remarks +/// Stdargs function - must supply -1 as last paremeter, which is NOT +/// added to the set. +/// +/// +ANTLR3_API pANTLR3_BITSET +antlr3BitsetLoad(pANTLR3_BITSET_LIST inBits) +{ + pANTLR3_BITSET bitset; + ANTLR3_UINT32 count; + + // Allocate memory for the bitset structure itself + // the input parameter is the bit number (0 based) + // to include in the bitset, so we need at at least + // bit + 1 bits. If any arguments indicate a + // a bit higher than the default number of bits (0 means default size) + // then Add() will take care + // of it. + // + bitset = antlr3BitsetNew(0); + + if (bitset == NULL) + { + return NULL; + } + + if (inBits != NULL) + { + // Now we can add the element bits into the set + // + count=0; + while (count < inBits->length) + { + if (bitset->blist.length <= count) + { + bitset->grow(bitset, count+1); + } + + bitset->blist.bits[count] = *((inBits->bits)+count); + count++; + } + } + + // return the new bitset + // + return bitset; +} + +/// +/// \brief +/// Creates a new bitset with at least one element, but as +/// many elements are required. +/// +/// \param[in] bit +/// A variable number of bits to add to the set, ending in -1 (impossible bit). +/// +/// \returns +/// A new bit set with all of the specified elements added into it. +/// +/// Call as: +/// - pANTLR3_BITSET = antlrBitsetOf(n, n1, n2, -1); +/// - pANTLR3_BITSET = antlrBitsetOf(-1); Create empty bitset +/// +/// \remarks +/// Stdargs function - must supply -1 as last paremeter, which is NOT +/// added to the set. +/// +/// +ANTLR3_API pANTLR3_BITSET +antlr3BitsetOf(ANTLR3_INT32 bit, ...) +{ + pANTLR3_BITSET bitset; + + va_list ap; + + // Allocate memory for the bitset structure itself + // the input parameter is the bit number (0 based) + // to include in the bitset, so we need at at least + // bit + 1 bits. If any arguments indicate a + // a bit higher than the default number of bits (0 menas default size) + // then Add() will take care + // of it. + // + bitset = antlr3BitsetNew(0); + + if (bitset == NULL) + { + return NULL; + } + + // Now we can add the element bits into the set + // + va_start(ap, bit); + while (bit != -1) + { + antlr3BitsetAdd(bitset, bit); + bit = va_arg(ap, ANTLR3_UINT32); + } + va_end(ap); + + // return the new bitset + // + return bitset; +} + +static pANTLR3_BITSET +antlr3BitsetOR(pANTLR3_BITSET bitset1, pANTLR3_BITSET bitset2) +{ + pANTLR3_BITSET bitset; + + if (bitset1 == NULL) + { + return antlr3BitsetClone(bitset2); + } + + if (bitset2 == NULL) + { + return antlr3BitsetClone(bitset1); + } + + // Allocate memory for the newly ordered bitset structure itself. + // + bitset = antlr3BitsetClone(bitset1); + + antlr3BitsetORInPlace(bitset, bitset2); + + return bitset; + +} + +static void +antlr3BitsetAdd(pANTLR3_BITSET bitset, ANTLR3_INT32 bit) +{ + ANTLR3_UINT32 word; + + word = wordNumber(bit); + + if (word >= bitset->blist.length) + { + growToInclude(bitset, bit); + } + + bitset->blist.bits[word] |= bitMask(bit); + +} + +static void +grow(pANTLR3_BITSET bitset, ANTLR3_INT32 newSize) +{ + pANTLR3_BITWORD newBits; + + // Space for newly sized bitset - TODO: come back to this and use realloc?, it may + // be more efficient... + // + newBits = (pANTLR3_BITWORD) ANTLR3_CALLOC(1, (size_t)(newSize * sizeof(ANTLR3_BITWORD))); + if (bitset->blist.bits != NULL) + { + // Copy existing bits + // + ANTLR3_MEMCPY((void *)newBits, (const void *)bitset->blist.bits, (size_t)(bitset->blist.length * sizeof(ANTLR3_BITWORD))); + + // Out with the old bits... de de de derrr + // + ANTLR3_FREE(bitset->blist.bits); + } + + // In with the new bits... keerrrang. + // + bitset->blist.bits = newBits; + bitset->blist.length = newSize; +} + +static void +growToInclude(pANTLR3_BITSET bitset, ANTLR3_INT32 bit) +{ + ANTLR3_UINT32 bl; + ANTLR3_UINT32 nw; + + bl = (bitset->blist.length << 1); + nw = numWordsToHold(bit); + + if (bl > nw) + { + bitset->grow(bitset, bl); + } + else + { + bitset->grow(bitset, nw); + } +} + +static void +antlr3BitsetORInPlace(pANTLR3_BITSET bitset, pANTLR3_BITSET bitset2) +{ + ANTLR3_UINT32 minimum; + ANTLR3_UINT32 i; + + if (bitset2 == NULL) + { + return; + } + + + // First make sure that the target bitset is big enough + // for the new bits to be ored in. + // + if (bitset->blist.length < bitset2->blist.length) + { + growToInclude(bitset, (bitset2->blist.length * sizeof(ANTLR3_BITWORD))); + } + + // Or the miniimum number of bits after any resizing went on + // + if (bitset->blist.length < bitset2->blist.length) + { + minimum = bitset->blist.length; + } + else + { + minimum = bitset2->blist.length; + } + + for (i = minimum; i > 0; i--) + { + bitset->blist.bits[i-1] |= bitset2->blist.bits[i-1]; + } +} + +static ANTLR3_UINT64 +bitMask(ANTLR3_UINT32 bitNumber) +{ + return ((ANTLR3_UINT64)1) << (bitNumber & (ANTLR3_BITSET_MOD_MASK)); +} + +static ANTLR3_UINT32 +antlr3BitsetSize(pANTLR3_BITSET bitset) +{ + ANTLR3_UINT32 degree; + ANTLR3_INT32 i; + ANTLR3_INT8 bit; + + // TODO: Come back to this, it may be faster to & with 0x01 + // then shift right a copy of the 4 bits, than shift left a constant of 1. + // But then again, the optimizer might just work this out + // anyway. + // + degree = 0; + for (i = bitset->blist.length - 1; i>= 0; i--) + { + if (bitset->blist.bits[i] != 0) + { + for (bit = ANTLR3_BITSET_BITS - 1; bit >= 0; bit--) + { + if ((bitset->blist.bits[i] & (((ANTLR3_BITWORD)1) << bit)) != 0) + { + degree++; + } + } + } + } + return degree; +} + +static ANTLR3_BOOLEAN +antlr3BitsetEquals(pANTLR3_BITSET bitset1, pANTLR3_BITSET bitset2) +{ + ANTLR3_INT32 minimum; + ANTLR3_INT32 i; + + if (bitset1 == NULL || bitset2 == NULL) + { + return ANTLR3_FALSE; + } + + // Work out the minimum comparison set + // + if (bitset1->blist.length < bitset2->blist.length) + { + minimum = bitset1->blist.length; + } + else + { + minimum = bitset2->blist.length; + } + + // Make sure explict in common bits are equal + // + for (i = minimum - 1; i >=0 ; i--) + { + if (bitset1->blist.bits[i] != bitset2->blist.bits[i]) + { + return ANTLR3_FALSE; + } + } + + // Now make sure the bits of the larger set are all turned + // off. + // + if (bitset1->blist.length > (ANTLR3_UINT32)minimum) + { + for (i = minimum ; (ANTLR3_UINT32)i < bitset1->blist.length; i++) + { + if (bitset1->blist.bits[i] != 0) + { + return ANTLR3_FALSE; + } + } + } + else if (bitset2->blist.length > (ANTLR3_UINT32)minimum) + { + for (i = minimum; (ANTLR3_UINT32)i < bitset2->blist.length; i++) + { + if (bitset2->blist.bits[i] != 0) + { + return ANTLR3_FALSE; + } + } + } + + return ANTLR3_TRUE; +} + +static ANTLR3_BOOLEAN +antlr3BitsetMember(pANTLR3_BITSET bitset, ANTLR3_UINT32 bit) +{ + ANTLR3_UINT32 wordNo; + + wordNo = wordNumber(bit); + + if (wordNo >= bitset->blist.length) + { + return ANTLR3_FALSE; + } + + if ((bitset->blist.bits[wordNo] & bitMask(bit)) == 0) + { + return ANTLR3_FALSE; + } + else + { + return ANTLR3_TRUE; + } +} + +static void +antlr3BitsetRemove(pANTLR3_BITSET bitset, ANTLR3_UINT32 bit) +{ + ANTLR3_UINT32 wordNo; + + wordNo = wordNumber(bit); + + if (wordNo < bitset->blist.length) + { + bitset->blist.bits[wordNo] &= ~(bitMask(bit)); + } +} +static ANTLR3_BOOLEAN +antlr3BitsetIsNil(pANTLR3_BITSET bitset) +{ + ANTLR3_INT32 i; + + for (i = bitset->blist.length -1; i>= 0; i--) + { + if (bitset->blist.bits[i] != 0) + { + return ANTLR3_FALSE; + } + } + + return ANTLR3_TRUE; +} + +static ANTLR3_UINT32 +numWordsToHold(ANTLR3_UINT32 bit) +{ + return (bit >> ANTLR3_BITSET_LOG_BITS) + 1; +} + +static ANTLR3_UINT32 +wordNumber(ANTLR3_UINT32 bit) +{ + return bit >> ANTLR3_BITSET_LOG_BITS; +} + +static ANTLR3_UINT32 +antlr3BitsetNumBits(pANTLR3_BITSET bitset) +{ + return bitset->blist.length << ANTLR3_BITSET_LOG_BITS; +} + +/** Produce an integer list of all the bits that are turned on + * in this bitset. Used for error processing in the main as the bitset + * reresents a number of integer tokens which we use for follow sets + * and so on. + * + * The first entry is the number of elements following in the list. + */ +static pANTLR3_INT32 +antlr3BitsetToIntList (pANTLR3_BITSET bitset) +{ + ANTLR3_UINT32 numInts; // How many integers we will need + ANTLR3_UINT32 numBits; // How many bits are in the set + ANTLR3_UINT32 i; + ANTLR3_UINT32 index; + + pANTLR3_INT32 intList; + + numInts = bitset->size(bitset) + 1; + numBits = bitset->numBits(bitset); + + intList = (pANTLR3_INT32)ANTLR3_MALLOC(numInts * sizeof(ANTLR3_INT32)); + + if (intList == NULL) + { + return NULL; // Out of memory + } + + intList[0] = numInts; + + // Enumerate the bits that are turned on + // + for (i = 0, index = 1; i<numBits; i++) + { + if (bitset->isMember(bitset, i) == ANTLR3_TRUE) + { + intList[index++] = i; + } + } + + // Result set + // + return intList; +} + diff --git a/impl/antlr/libantlr3c-3.4/src/antlr3collections.c b/impl/antlr/libantlr3c-3.4/src/antlr3collections.c new file mode 100644 index 0000000..d9e22e9 --- /dev/null +++ b/impl/antlr/libantlr3c-3.4/src/antlr3collections.c @@ -0,0 +1,2741 @@ +/// \file +/// Provides a number of useful functions that are roughly equivalent +/// to java HashTable and List for the purposes of Antlr 3 C runtime. +/// Also useable by the C programmer for things like symbol tables pointers +/// and so on. +/// +/// + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <antlr3.h> + +#include "antlr3collections.h" + +// Interface functions for hash table +// + +// String based keys +// +static void antlr3HashDelete (pANTLR3_HASH_TABLE table, void * key); +static void * antlr3HashGet (pANTLR3_HASH_TABLE table, void * key); +static pANTLR3_HASH_ENTRY antlr3HashRemove (pANTLR3_HASH_TABLE table, void * key); +static ANTLR3_INT32 antlr3HashPut (pANTLR3_HASH_TABLE table, void * key, void * element, void (ANTLR3_CDECL *freeptr)(void *)); + +// Integer based keys (Lists and so on) +// +static void antlr3HashDeleteI (pANTLR3_HASH_TABLE table, ANTLR3_INTKEY key); +static void * antlr3HashGetI (pANTLR3_HASH_TABLE table, ANTLR3_INTKEY key); +static pANTLR3_HASH_ENTRY antlr3HashRemoveI (pANTLR3_HASH_TABLE table, ANTLR3_INTKEY key); +static ANTLR3_INT32 antlr3HashPutI (pANTLR3_HASH_TABLE table, ANTLR3_INTKEY key, void * element, void (ANTLR3_CDECL *freeptr)(void *)); + +static void antlr3HashFree (pANTLR3_HASH_TABLE table); +static ANTLR3_UINT32 antlr3HashSize (pANTLR3_HASH_TABLE table); + +// ----------- + +// Interface functions for enumeration +// +static int antlr3EnumNext (pANTLR3_HASH_ENUM en, pANTLR3_HASH_KEY * key, void ** data); +static void antlr3EnumFree (pANTLR3_HASH_ENUM en); + +// Interface functions for List +// +static void antlr3ListFree (pANTLR3_LIST list); +static void antlr3ListDelete(pANTLR3_LIST list, ANTLR3_INTKEY key); +static void * antlr3ListGet (pANTLR3_LIST list, ANTLR3_INTKEY key); +static ANTLR3_INT32 antlr3ListPut (pANTLR3_LIST list, ANTLR3_INTKEY key, void * element, void (ANTLR3_CDECL *freeptr)(void *)); +static ANTLR3_INT32 antlr3ListAdd (pANTLR3_LIST list, void * element, void (ANTLR3_CDECL *freeptr)(void *)); +static void * antlr3ListRemove(pANTLR3_LIST list, ANTLR3_INTKEY key); +static ANTLR3_UINT32 antlr3ListSize (pANTLR3_LIST list); + +// Interface functions for Stack +// +static void antlr3StackFree (pANTLR3_STACK stack); +static void * antlr3StackPop (pANTLR3_STACK stack); +static void * antlr3StackGet (pANTLR3_STACK stack, ANTLR3_INTKEY key); +static ANTLR3_BOOLEAN antlr3StackPush (pANTLR3_STACK stack, void * element, void (ANTLR3_CDECL *freeptr)(void *)); +static ANTLR3_UINT32 antlr3StackSize (pANTLR3_STACK stack); +static void * antlr3StackPeek (pANTLR3_STACK stack); + +// Interface functions for vectors +// +static void ANTLR3_CDECL antlr3VectorFree (pANTLR3_VECTOR vector); +static void antlr3VectorDel (pANTLR3_VECTOR vector, ANTLR3_UINT32 entry); +static void * antlr3VectorGet (pANTLR3_VECTOR vector, ANTLR3_UINT32 entry); +static void * antrl3VectorRemove (pANTLR3_VECTOR vector, ANTLR3_UINT32 entry); +static void antlr3VectorClear (pANTLR3_VECTOR vector); +static ANTLR3_UINT32 antlr3VectorAdd (pANTLR3_VECTOR vector, void * element, void (ANTLR3_CDECL *freeptr)(void *)); +static ANTLR3_UINT32 antlr3VectorSet (pANTLR3_VECTOR vector, ANTLR3_UINT32 entry, void * element, void (ANTLR3_CDECL *freeptr)(void *), ANTLR3_BOOLEAN freeExisting); +static ANTLR3_UINT32 antlr3VectorSize (pANTLR3_VECTOR vector); +static ANTLR3_BOOLEAN antlr3VectorSwap (pANTLR3_VECTOR vector, ANTLR3_UINT32 entry1, ANTLR3_UINT32 entry2); + +static void newPool (pANTLR3_VECTOR_FACTORY factory); +static void closeVectorFactory (pANTLR3_VECTOR_FACTORY factory); +static pANTLR3_VECTOR newVector (pANTLR3_VECTOR_FACTORY factory); +static void returnVector (pANTLR3_VECTOR_FACTORY factory, pANTLR3_VECTOR vector); + + +// Interface functions for int TRIE +// +static pANTLR3_TRIE_ENTRY intTrieGet (pANTLR3_INT_TRIE trie, ANTLR3_INTKEY key); +static ANTLR3_BOOLEAN intTrieDel (pANTLR3_INT_TRIE trie, ANTLR3_INTKEY key); +static ANTLR3_BOOLEAN intTrieAdd (pANTLR3_INT_TRIE trie, ANTLR3_INTKEY key, ANTLR3_UINT32 type, ANTLR3_INTKEY intType, void * data, void (ANTLR3_CDECL *freeptr)(void *)); +static void intTrieFree (pANTLR3_INT_TRIE trie); + + +// Interface functions for topological sorter +// +static void addEdge (pANTLR3_TOPO topo, ANTLR3_UINT32 edge, ANTLR3_UINT32 dependency); +static pANTLR3_UINT32 sortToArray (pANTLR3_TOPO topo); +static void sortVector (pANTLR3_TOPO topo, pANTLR3_VECTOR v); +static void freeTopo (pANTLR3_TOPO topo); + +// Local function to advance enumeration structure pointers +// +static void antlr3EnumNextEntry(pANTLR3_HASH_ENUM en); + +pANTLR3_HASH_TABLE +antlr3HashTableNew(ANTLR3_UINT32 sizeHint) +{ + // All we have to do is create the hashtable tracking structure + // and allocate memory for the requested number of buckets. + // + pANTLR3_HASH_TABLE table; + + ANTLR3_UINT32 bucket; // Used to traverse the buckets + + table = ANTLR3_MALLOC(sizeof(ANTLR3_HASH_TABLE)); + + // Error out if no memory left + if (table == NULL) + { + return NULL; + } + + // Allocate memory for the buckets + // + table->buckets = (pANTLR3_HASH_BUCKET) ANTLR3_MALLOC((size_t) (sizeof(ANTLR3_HASH_BUCKET) * sizeHint)); + + if (table->buckets == NULL) + { + ANTLR3_FREE((void *)table); + return NULL; + } + + // Modulo of the table, (bucket count). + // + table->modulo = sizeHint; + + table->count = 0; /* Nothing in there yet ( I hope) */ + + /* Initialize the buckets to empty + */ + for (bucket = 0; bucket < sizeHint; bucket++) + { + table->buckets[bucket].entries = NULL; + } + + /* Exclude duplicate entries by default + */ + table->allowDups = ANTLR3_FALSE; + + /* Assume that keys should by strduped before they are + * entered in the table. + */ + table->doStrdup = ANTLR3_TRUE; + + /* Install the interface + */ + + table->get = antlr3HashGet; + table->put = antlr3HashPut; + table->del = antlr3HashDelete; + table->remove = antlr3HashRemove; + + table->getI = antlr3HashGetI; + table->putI = antlr3HashPutI; + table->delI = antlr3HashDeleteI; + table->removeI = antlr3HashRemoveI; + + table->size = antlr3HashSize; + table->free = antlr3HashFree; + + return table; +} + +static void +antlr3HashFree(pANTLR3_HASH_TABLE table) +{ + ANTLR3_UINT32 bucket; /* Used to traverse the buckets */ + + pANTLR3_HASH_BUCKET thisBucket; + pANTLR3_HASH_ENTRY entry; + pANTLR3_HASH_ENTRY nextEntry; + + /* Free the table, all buckets and all entries, and all the + * keys and data (if the table exists) + */ + if (table != NULL) + { + for (bucket = 0; bucket < table->modulo; bucket++) + { + thisBucket = &(table->buckets[bucket]); + + /* Allow sparse tables, though we don't create them as such at present + */ + if ( thisBucket != NULL) + { + entry = thisBucket->entries; + + /* Search all entries in the bucket and free them up + */ + while (entry != NULL) + { + /* Save next entry - we do not want to access memory in entry after we + * have freed it. + */ + nextEntry = entry->nextEntry; + + /* Free any data pointer, this only happens if the user supplied + * a pointer to a routine that knwos how to free the structure they + * added to the table. + */ + if (entry->free != NULL) + { + entry->free(entry->data); + } + + /* Free the key memory - we know that we allocated this + */ + if (entry->keybase.type == ANTLR3_HASH_TYPE_STR && entry->keybase.key.sKey != NULL) + { + ANTLR3_FREE(entry->keybase.key.sKey); + } + + /* Free this entry + */ + ANTLR3_FREE(entry); + entry = nextEntry; /* Load next pointer to see if we shoud free it */ + } + /* Invalidate the current pointer + */ + thisBucket->entries = NULL; + } + } + + /* Now we can free the bucket memory + */ + ANTLR3_FREE(table->buckets); + } + + /* Now we free teh memory for the table itself + */ + ANTLR3_FREE(table); +} + +/** return the current size of the hash table + */ +static ANTLR3_UINT32 antlr3HashSize (pANTLR3_HASH_TABLE table) +{ + return table->count; +} + +/** Remove a numeric keyed entry from a hash table if it exists, + * no error if it does not exist. + */ +static pANTLR3_HASH_ENTRY antlr3HashRemoveI (pANTLR3_HASH_TABLE table, ANTLR3_INTKEY key) +{ + ANTLR3_UINT32 hash; + pANTLR3_HASH_BUCKET bucket; + pANTLR3_HASH_ENTRY entry; + pANTLR3_HASH_ENTRY * nextPointer; + + /* First we need to know the hash of the provided key + */ + hash = (ANTLR3_UINT32)(key % (ANTLR3_INTKEY)(table->modulo)); + + /* Knowing the hash, we can find the bucket + */ + bucket = table->buckets + hash; + + /* Now, we traverse the entries in the bucket until + * we find the key or the end of the entries in the bucket. + * We track the element prior to the one we are examining + * as we need to set its next pointer to the next pointer + * of the entry we are deleting (if we find it). + */ + entry = bucket->entries; /* Entry to examine */ + nextPointer = & bucket->entries; /* Where to put the next pointer of the deleted entry */ + + while (entry != NULL) + { + /* See if this is the entry we wish to delete + */ + if (entry->keybase.key.iKey == key) + { + /* It was the correct entry, so we set the next pointer + * of the previous entry to the next pointer of this + * located one, which takes it out of the chain. + */ + (*nextPointer) = entry->nextEntry; + + table->count--; + + return entry; + } + else + { + /* We found an entry but it wasn't the one that was wanted, so + * move to the next one, if any. + */ + nextPointer = & (entry->nextEntry); /* Address of the next pointer in the current entry */ + entry = entry->nextEntry; /* Address of the next element in the bucket (if any) */ + } + } + + return NULL; /* Not found */ +} + +/** Remove the element in the hash table for a particular + * key value, if it exists - no error if it does not. + */ +static pANTLR3_HASH_ENTRY +antlr3HashRemove(pANTLR3_HASH_TABLE table, void * key) +{ + ANTLR3_UINT32 hash; + pANTLR3_HASH_BUCKET bucket; + pANTLR3_HASH_ENTRY entry; + pANTLR3_HASH_ENTRY * nextPointer; + + /* First we need to know the hash of the provided key + */ + hash = antlr3Hash(key, (ANTLR3_UINT32)strlen((const char *)key)); + + /* Knowing the hash, we can find the bucket + */ + bucket = table->buckets + (hash % table->modulo); + + /* Now, we traverse the entries in the bucket until + * we find the key or the end of the entires in the bucket. + * We track the element prior to the one we are exmaining + * as we need to set its next pointer to the next pointer + * of the entry we are deleting (if we find it). + */ + entry = bucket->entries; /* Entry to examine */ + nextPointer = & bucket->entries; /* Where to put the next pointer of the deleted entry */ + + while (entry != NULL) + { + /* See if this is the entry we wish to delete + */ + if (strcmp((const char *)key, (const char *)entry->keybase.key.sKey) == 0) + { + /* It was the correct entry, so we set the next pointer + * of the previous entry to the next pointer of this + * located one, which takes it out of the chain. + */ + (*nextPointer) = entry->nextEntry; + + /* Release the key - if we allocated that + */ + if (table->doStrdup == ANTLR3_TRUE) + { + ANTLR3_FREE(entry->keybase.key.sKey); + } + entry->keybase.key.sKey = NULL; + + table->count--; + + return entry; + } + else + { + /* We found an entry but it wasn't the one that was wanted, so + * move to the next one, if any. + */ + nextPointer = & (entry->nextEntry); /* Address of the next pointer in the current entry */ + entry = entry->nextEntry; /* Address of the next element in the bucket (if any) */ + } + } + + return NULL; /* Not found */ +} + +/** Takes the element with the supplied key out of the list, and deletes the data + * calling the supplied free() routine if any. + */ +static void +antlr3HashDeleteI (pANTLR3_HASH_TABLE table, ANTLR3_INTKEY key) +{ + pANTLR3_HASH_ENTRY entry; + + entry = antlr3HashRemoveI(table, key); + + /* Now we can free the elements and the entry in order + */ + if (entry != NULL && entry->free != NULL) + { + /* Call programmer supplied function to release this entry data + */ + entry->free(entry->data); + entry->data = NULL; + } + /* Finally release the space for this entry block. + */ + ANTLR3_FREE(entry); +} + +/** Takes the element with the supplied key out of the list, and deletes the data + * calling the supplied free() routine if any. + */ +static void +antlr3HashDelete (pANTLR3_HASH_TABLE table, void * key) +{ + pANTLR3_HASH_ENTRY entry; + + entry = antlr3HashRemove(table, key); + + /* Now we can free the elements and the entry in order + */ + if (entry != NULL && entry->free != NULL) + { + /* Call programmer supplied function to release this entry data + */ + entry->free(entry->data); + entry->data = NULL; + } + /* Finally release the space for this entry block. + */ + ANTLR3_FREE(entry); +} + +/** Return the element pointer in the hash table for a particular + * key value, or NULL if it don't exist (or was itself NULL). + */ +static void * +antlr3HashGetI(pANTLR3_HASH_TABLE table, ANTLR3_INTKEY key) +{ + ANTLR3_UINT32 hash; + pANTLR3_HASH_BUCKET bucket; + pANTLR3_HASH_ENTRY entry; + + /* First we need to know the hash of the provided key + */ + hash = (ANTLR3_UINT32)(key % (ANTLR3_INTKEY)(table->modulo)); + + /* Knowing the hash, we can find the bucket + */ + bucket = table->buckets + hash; + + /* Now we can inspect the key at each entry in the bucket + * and see if we have a match. + */ + entry = bucket->entries; + + while (entry != NULL) + { + if (entry->keybase.key.iKey == key) + { + /* Match was found, return the data pointer for this entry + */ + return entry->data; + } + entry = entry->nextEntry; + } + + /* If we got here, then we did not find the key + */ + return NULL; +} + +/** Return the element pointer in the hash table for a particular + * key value, or NULL if it don't exist (or was itself NULL). + */ +static void * +antlr3HashGet(pANTLR3_HASH_TABLE table, void * key) +{ + ANTLR3_UINT32 hash; + pANTLR3_HASH_BUCKET bucket; + pANTLR3_HASH_ENTRY entry; + + + /* First we need to know the hash of the provided key + */ + hash = antlr3Hash(key, (ANTLR3_UINT32)strlen((const char *)key)); + + /* Knowing the hash, we can find the bucket + */ + bucket = table->buckets + (hash % table->modulo); + + /* Now we can inspect the key at each entry in the bucket + * and see if we have a match. + */ + entry = bucket->entries; + + while (entry != NULL) + { + if (strcmp((const char *)key, (const char *)entry->keybase.key.sKey) == 0) + { + /* Match was found, return the data pointer for this entry + */ + return entry->data; + } + entry = entry->nextEntry; + } + + /* If we got here, then we did not find the key + */ + return NULL; +} + +/** Add the element pointer in to the table, based upon the + * hash of the provided key. + */ +static ANTLR3_INT32 +antlr3HashPutI(pANTLR3_HASH_TABLE table, ANTLR3_INTKEY key, void * element, void (ANTLR3_CDECL *freeptr)(void *)) +{ + ANTLR3_UINT32 hash; + pANTLR3_HASH_BUCKET bucket; + pANTLR3_HASH_ENTRY entry; + pANTLR3_HASH_ENTRY * newPointer; + + /* First we need to know the hash of the provided key + */ + hash = (ANTLR3_UINT32)(key % (ANTLR3_INTKEY)(table->modulo)); + + /* Knowing the hash, we can find the bucket + */ + bucket = table->buckets + hash; + + /* Knowing the bucket, we can traverse the entries until we + * we find a NULL pointer or we find that this is already + * in the table and duplicates were not allowed. + */ + newPointer = &bucket->entries; + + while (*newPointer != NULL) + { + /* The value at new pointer is pointing to an existing entry. + * If duplicates are allowed then we don't care what it is, but + * must reject this add if the key is the same as the one we are + * supplied with. + */ + if (table->allowDups == ANTLR3_FALSE) + { + if ((*newPointer)->keybase.key.iKey == key) + { + return ANTLR3_ERR_HASHDUP; + } + } + + /* Point to the next entry pointer of the current entry we + * are traversing, if it is NULL we will create our new + * structure and point this to it. + */ + newPointer = &((*newPointer)->nextEntry); + } + + /* newPointer is now pointing at the pointer where we need to + * add our new entry, so let's crate the entry and add it in. + */ + entry = (pANTLR3_HASH_ENTRY)ANTLR3_MALLOC((size_t)sizeof(ANTLR3_HASH_ENTRY)); + + if (entry == NULL) + { + return ANTLR3_ERR_NOMEM; + } + + entry->data = element; /* Install the data element supplied */ + entry->free = freeptr; /* Function that knows how to release the entry */ + entry->keybase.type = ANTLR3_HASH_TYPE_INT; /* Indicate the key type stored here for when we free */ + entry->keybase.key.iKey = key; /* Record the key value */ + entry->nextEntry = NULL; /* Ensure that the forward pointer ends the chain */ + + *newPointer = entry; /* Install the next entry in this bucket */ + + table->count++; + + return ANTLR3_SUCCESS; +} + + +/** Add the element pointer in to the table, based upon the + * hash of the provided key. + */ +static ANTLR3_INT32 +antlr3HashPut(pANTLR3_HASH_TABLE table, void * key, void * element, void (ANTLR3_CDECL *freeptr)(void *)) +{ + ANTLR3_UINT32 hash; + pANTLR3_HASH_BUCKET bucket; + pANTLR3_HASH_ENTRY entry; + pANTLR3_HASH_ENTRY * newPointer; + + /* First we need to know the hash of the provided key + */ + hash = antlr3Hash(key, (ANTLR3_UINT32)strlen((const char *)key)); + + /* Knowing the hash, we can find the bucket + */ + bucket = table->buckets + (hash % table->modulo); + + /* Knowign the bucket, we can traverse the entries until we + * we find a NULL pointer ofr we find that this is already + * in the table and duplicates were not allowed. + */ + newPointer = &bucket->entries; + + while (*newPointer != NULL) + { + /* The value at new pointer is pointing to an existing entry. + * If duplicates are allowed then we don't care what it is, but + * must reject this add if the key is the same as the one we are + * supplied with. + */ + if (table->allowDups == ANTLR3_FALSE) + { + if (strcmp((const char*) key, (const char *)(*newPointer)->keybase.key.sKey) == 0) + { + return ANTLR3_ERR_HASHDUP; + } + } + + /* Point to the next entry pointer of the current entry we + * are traversing, if it is NULL we will create our new + * structure and point this to it. + */ + newPointer = &((*newPointer)->nextEntry); + } + + /* newPointer is now poiting at the pointer where we need to + * add our new entry, so let's crate the entry and add it in. + */ + entry = (pANTLR3_HASH_ENTRY)ANTLR3_MALLOC((size_t)sizeof(ANTLR3_HASH_ENTRY)); + + if (entry == NULL) + { + return ANTLR3_ERR_NOMEM; + } + + entry->data = element; /* Install the data element supplied */ + entry->free = freeptr; /* Function that knows how to release the entry */ + entry->keybase.type = ANTLR3_HASH_TYPE_STR; /* Indicate the key type stored here for free() */ + if (table->doStrdup == ANTLR3_TRUE) + { + entry->keybase.key.sKey = ANTLR3_STRDUP(key); /* Record the key value */ + } + else + { + entry->keybase.key.sKey = key; /* Record the key value */ + } + entry->nextEntry = NULL; /* Ensure that the forward pointer ends the chain */ + + *newPointer = entry; /* Install the next entry in this bucket */ + + table->count++; + + return ANTLR3_SUCCESS; +} + +/** \brief Creates an enumeration structure to traverse the hash table. + * + * \param table Table to enumerate + * \return Pointer to enumeration structure. + */ +pANTLR3_HASH_ENUM +antlr3EnumNew (pANTLR3_HASH_TABLE table) +{ + pANTLR3_HASH_ENUM en; + + /* Allocate structure memory + */ + en = (pANTLR3_HASH_ENUM) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_HASH_ENUM)); + + /* Check that the allocation was good + */ + if (en == NULL) + { + return (pANTLR3_HASH_ENUM) ANTLR3_FUNC_PTR(ANTLR3_ERR_NOMEM); + } + + /* Initialize the start pointers + */ + en->table = table; + en->bucket = 0; /* First bucket */ + en->entry = en->table->buckets->entries; /* First entry to return */ + + /* Special case in that the first bucket may not have anything in it + * but the antlr3EnumNext() function expects that the en->entry is + * set to the next valid pointer. Hence if it is not a valid element + * pointer, attempt to find the next one that is, (table may be empty + * of course. + */ + if (en->entry == NULL) + { + antlr3EnumNextEntry(en); + } + + /* Install the interface + */ + en->free = antlr3EnumFree; + en->next = antlr3EnumNext; + + /* All is good + */ + return en; +} + +/** \brief Return the next entry in the hashtable being traversed by the supplied + * enumeration. + * + * \param[in] en Pointer to the enumeration tracking structure + * \param key Pointer to void pointer, where the key pointer is returned. + * \param data Pointer to void pointer where the data pointer is returned. + * \return + * - ANTLR3_SUCCESS if there was a next key + * - ANTLR3_FAIL if there were no more keys + * + * \remark + * No checking of input structure is performed! + */ +static int +antlr3EnumNext (pANTLR3_HASH_ENUM en, pANTLR3_HASH_KEY * key, void ** data) +{ + /* If the current entry is valid, then use it + */ + if (en->bucket >= en->table->modulo) + { + /* Already exhausted the table + */ + return ANTLR3_FAIL; + } + + /* Pointers are already set to the current entry to return, or + * we would not be at this point in the logic flow. + */ + *key = &(en->entry->keybase); + *data = en->entry->data; + + /* Return pointers are set up, so now we move the element + * pointer to the next in the table (if any). + */ + antlr3EnumNextEntry(en); + + return ANTLR3_SUCCESS; +} + +/** \brief Local function to advance the entry pointer of an enumeration + * structure to the next valid entry (if there is one). + * + * \param[in] enum Pointer to ANTLR3 enumeration structure returned by antlr3EnumNew() + * + * \remark + * - The function always leaves the pointers pointing at a valid entry if there + * is one, so if the entry pointer is NULL when this function exits, there were + * no more entries in the table. + */ +static void +antlr3EnumNextEntry(pANTLR3_HASH_ENUM en) +{ + pANTLR3_HASH_BUCKET bucket; + + /* See if the current entry pointer is valid first of all + */ + if (en->entry != NULL) + { + /* Current entry was a valid point, see if there is another + * one in the chain. + */ + if (en->entry->nextEntry != NULL) + { + /* Next entry in the enumeration is just the next entry + * in the chain. + */ + en->entry = en->entry->nextEntry; + return; + } + } + + /* There were no more entries in the current bucket, if there are + * more buckets then chase them until we find an entry. + */ + en->bucket++; + + while (en->bucket < en->table->modulo) + { + /* There was one more bucket, see if it has any elements in it + */ + bucket = en->table->buckets + en->bucket; + + if (bucket->entries != NULL) + { + /* There was an entry in this bucket, so we can use it + * for the next entry in the enumeration. + */ + en->entry = bucket->entries; + return; + } + + /* There was nothing in the bucket we just examined, move to the + * next one. + */ + en->bucket++; + } + + /* Here we have exhausted all buckets and the enumeration pointer will + * have its bucket count = table->modulo which signifies that we are done. + */ +} + +/** \brief Frees up the memory structures that represent a hash table + * enumeration. + * \param[in] enum Pointer to ANTLR3 enumeration structure returned by antlr3EnumNew() + */ +static void +antlr3EnumFree (pANTLR3_HASH_ENUM en) +{ + /* Nothing to check, we just free it. + */ + ANTLR3_FREE(en); +} + +/** Given an input key of arbitrary length, return a hash value of + * it. This can then be used (with suitable modulo) to index other + * structures. + */ +ANTLR3_API ANTLR3_UINT32 +antlr3Hash(void * key, ANTLR3_UINT32 keylen) +{ + /* Accumulate the hash value of the key + */ + ANTLR3_UINT32 hash; + pANTLR3_UINT8 keyPtr; + ANTLR3_UINT32 i1; + + hash = 0; + keyPtr = (pANTLR3_UINT8) key; + + /* Iterate the key and accumulate the hash + */ + while(keylen > 0) + { + hash = (hash << 4) + (*(keyPtr++)); + + if ((i1=hash&0xf0000000) != 0) + { + hash = hash ^ (i1 >> 24); + hash = hash ^ i1; + } + keylen--; + } + + return hash; +} + +ANTLR3_API pANTLR3_LIST +antlr3ListNew (ANTLR3_UINT32 sizeHint) +{ + pANTLR3_LIST list; + + /* Allocate memory + */ + list = (pANTLR3_LIST)ANTLR3_MALLOC((size_t)sizeof(ANTLR3_LIST)); + + if (list == NULL) + { + return (pANTLR3_LIST)ANTLR3_FUNC_PTR(ANTLR3_ERR_NOMEM); + } + + /* Now we need to add a new table + */ + list->table = antlr3HashTableNew(sizeHint); + + if (list->table == (pANTLR3_HASH_TABLE)ANTLR3_FUNC_PTR(ANTLR3_ERR_NOMEM)) + { + return (pANTLR3_LIST)ANTLR3_FUNC_PTR(ANTLR3_ERR_NOMEM); + } + + /* Allocation was good, install interface + */ + list->free = antlr3ListFree; + list->del = antlr3ListDelete; + list->get = antlr3ListGet; + list->add = antlr3ListAdd; + list->remove = antlr3ListRemove; + list->put = antlr3ListPut; + list->size = antlr3ListSize; + + return list; +} + +static ANTLR3_UINT32 antlr3ListSize (pANTLR3_LIST list) +{ + return list->table->size(list->table); +} + +static void +antlr3ListFree (pANTLR3_LIST list) +{ + /* Free the hashtable that stores the list + */ + list->table->free(list->table); + + /* Free the allocation for the list itself + */ + ANTLR3_FREE(list); +} + +static void +antlr3ListDelete (pANTLR3_LIST list, ANTLR3_INTKEY key) +{ + list->table->delI(list->table, key); +} + +static void * +antlr3ListGet (pANTLR3_LIST list, ANTLR3_INTKEY key) +{ + return list->table->getI(list->table, key); +} + +/** Add the supplied element to the list, at the next available key + */ +static ANTLR3_INT32 antlr3ListAdd (pANTLR3_LIST list, void * element, void (ANTLR3_CDECL *freeptr)(void *)) +{ + ANTLR3_INTKEY key; + + key = list->table->size(list->table) + 1; + return list->put(list, key, element, freeptr); +} + +/** Remove from the list, but don't free the element, just send it back to the + * caller. + */ +static void * +antlr3ListRemove (pANTLR3_LIST list, ANTLR3_INTKEY key) +{ + pANTLR3_HASH_ENTRY entry; + + entry = list->table->removeI(list->table, key); + + if (entry != NULL) + { + return entry->data; + } + else + { + return NULL; + } +} + +static ANTLR3_INT32 +antlr3ListPut (pANTLR3_LIST list, ANTLR3_INTKEY key, void * element, void (ANTLR3_CDECL *freeptr)(void *)) +{ + return list->table->putI(list->table, key, element, freeptr); +} + +ANTLR3_API pANTLR3_STACK +antlr3StackNew (ANTLR3_UINT32 sizeHint) +{ + pANTLR3_STACK stack; + + /* Allocate memory + */ + stack = (pANTLR3_STACK)ANTLR3_MALLOC((size_t)sizeof(ANTLR3_STACK)); + + if (stack == NULL) + { + return (pANTLR3_STACK)ANTLR3_FUNC_PTR(ANTLR3_ERR_NOMEM); + } + + /* Now we need to add a new table + */ + stack->vector = antlr3VectorNew(sizeHint); + stack->top = NULL; + + if (stack->vector == (pANTLR3_VECTOR)ANTLR3_FUNC_PTR(ANTLR3_ERR_NOMEM)) + { + return (pANTLR3_STACK)ANTLR3_FUNC_PTR(ANTLR3_ERR_NOMEM); + } + + /* Looks good, now add the interface + */ + stack->get = antlr3StackGet; + stack->free = antlr3StackFree; + stack->pop = antlr3StackPop; + stack->push = antlr3StackPush; + stack->size = antlr3StackSize; + stack->peek = antlr3StackPeek; + + return stack; +} + +static ANTLR3_UINT32 antlr3StackSize (pANTLR3_STACK stack) +{ + return stack->vector->count; +} + + +static void +antlr3StackFree (pANTLR3_STACK stack) +{ + /* Free the list that supports the stack + */ + stack->vector->free(stack->vector); + stack->vector = NULL; + stack->top = NULL; + + ANTLR3_FREE(stack); +} + +static void * +antlr3StackPop (pANTLR3_STACK stack) +{ + // Delete the element that is currently at the top of the stack + // + stack->vector->del(stack->vector, stack->vector->count - 1); + + // And get the element that is the now the top of the stack (if anything) + // NOTE! This is not quite like a 'real' stack, which would normally return you + // the current top of the stack, then remove it from the stack. + // TODO: Review this, it is correct for follow sets which is what this was done for + // but is not as obvious when using it as a 'real'stack. + // + stack->top = stack->vector->get(stack->vector, stack->vector->count - 1); + return stack->top; +} + +static void * +antlr3StackGet (pANTLR3_STACK stack, ANTLR3_INTKEY key) +{ + return stack->vector->get(stack->vector, (ANTLR3_UINT32)key); +} + +static void * +antlr3StackPeek (pANTLR3_STACK stack) +{ + return stack->top; +} + +static ANTLR3_BOOLEAN +antlr3StackPush (pANTLR3_STACK stack, void * element, void (ANTLR3_CDECL *freeptr)(void *)) +{ + stack->top = element; + return (ANTLR3_BOOLEAN)(stack->vector->add(stack->vector, element, freeptr)); +} + +ANTLR3_API pANTLR3_VECTOR +antlr3VectorNew (ANTLR3_UINT32 sizeHint) +{ + pANTLR3_VECTOR vector; + + + // Allocate memory for the vector structure itself + // + vector = (pANTLR3_VECTOR) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_VECTOR))); + + if (vector == NULL) + { + return (pANTLR3_VECTOR)ANTLR3_FUNC_PTR(ANTLR3_ERR_NOMEM); + } + + // Now fill in the defaults + // + antlr3SetVectorApi(vector, sizeHint); + + // And everything is hunky dory + // + return vector; +} + +ANTLR3_API void +antlr3SetVectorApi (pANTLR3_VECTOR vector, ANTLR3_UINT32 sizeHint) +{ + ANTLR3_UINT32 initialSize; + + // Allow vectors to be guessed by ourselves, so input size can be zero + // + if (sizeHint > ANTLR3_VECTOR_INTERNAL_SIZE) + { + initialSize = sizeHint; + } + else + { + initialSize = ANTLR3_VECTOR_INTERNAL_SIZE; + } + + if (sizeHint > ANTLR3_VECTOR_INTERNAL_SIZE) + { + vector->elements = (pANTLR3_VECTOR_ELEMENT)ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_VECTOR_ELEMENT) * initialSize)); + } + else + { + vector->elements = vector->internal; + } + + if (vector->elements == NULL) + { + ANTLR3_FREE(vector); + return; + } + + // Memory allocated successfully + // + vector->count = 0; // No entries yet of course + vector->elementsSize = initialSize; // Available entries + + // Now we can install the API + // + vector->add = antlr3VectorAdd; + vector->del = antlr3VectorDel; + vector->get = antlr3VectorGet; + vector->free = antlr3VectorFree; + vector->set = antlr3VectorSet; + vector->remove = antrl3VectorRemove; + vector->clear = antlr3VectorClear; + vector->size = antlr3VectorSize; + vector->swap = antlr3VectorSwap; + + // Assume that this is not a factory made vector + // + vector->factoryMade = ANTLR3_FALSE; +} + +// Clear the entries in a vector. +// Clearing the vector leaves its capacity the same but +// it walks the entries first to see if any of them +// have a free routine that must be called. +// +static void +antlr3VectorClear (pANTLR3_VECTOR vector) +{ + ANTLR3_UINT32 entry; + + // We must traverse every entry in the vector and if it has + // a pointer to a free function then we call it with the + // the entry pointer + // + for (entry = 0; entry < vector->count; entry++) + { + if (vector->elements[entry].freeptr != NULL) + { + vector->elements[entry].freeptr(vector->elements[entry].element); + } + vector->elements[entry].freeptr = NULL; + vector->elements[entry].element = NULL; + } + + // Having called any free pointers, we just reset the entry count + // back to zero. + // + vector->count = 0; +} + +static +void ANTLR3_CDECL antlr3VectorFree (pANTLR3_VECTOR vector) +{ + ANTLR3_UINT32 entry; + + // We must traverse every entry in the vector and if it has + // a pointer to a free function then we call it with the + // the entry pointer + // + for (entry = 0; entry < vector->count; entry++) + { + if (vector->elements[entry].freeptr != NULL) + { + vector->elements[entry].freeptr(vector->elements[entry].element); + } + vector->elements[entry].freeptr = NULL; + vector->elements[entry].element = NULL; + } + + if (vector->factoryMade == ANTLR3_FALSE) + { + // The entries are freed, so free the element allocation + // + if (vector->elementsSize > ANTLR3_VECTOR_INTERNAL_SIZE) + { + ANTLR3_FREE(vector->elements); + } + vector->elements = NULL; + + // Finally, free the allocation for the vector itself + // + ANTLR3_FREE(vector); + } +} + +static void antlr3VectorDel (pANTLR3_VECTOR vector, ANTLR3_UINT32 entry) +{ + // Check this is a valid request first + // + if (entry >= vector->count) + { + return; + } + + // Valid request, check for free pointer and call it if present + // + if (vector->elements[entry].freeptr != NULL) + { + vector->elements[entry].freeptr(vector->elements[entry].element); + vector->elements[entry].freeptr = NULL; + } + + if (entry == vector->count - 1) + { + // Ensure the pointer is never reused by accident, but otherwise just + // decrement the pointer. + // + vector->elements[entry].element = NULL; + } + else + { + // Need to shuffle trailing pointers back over the deleted entry + // + ANTLR3_MEMMOVE(vector->elements + entry, vector->elements + entry + 1, sizeof(ANTLR3_VECTOR_ELEMENT) * (vector->count - entry - 1)); + } + + // One less entry in the vector now + // + vector->count--; +} + +static void * antlr3VectorGet (pANTLR3_VECTOR vector, ANTLR3_UINT32 entry) +{ + // Ensure this is a valid request + // + if (entry < vector->count) + { + return vector->elements[entry].element; + } + else + { + // I know nothing, Mr. Fawlty! + // + return NULL; + } +} + +/// Remove the entry from the vector, but do not free any entry, even if it has +/// a free pointer. +/// +static void * antrl3VectorRemove (pANTLR3_VECTOR vector, ANTLR3_UINT32 entry) +{ + void * element; + + // Check this is a valid request first + // + if (entry >= vector->count) + { + return NULL; + } + + // Valid request, return the sorted pointer + // + + element = vector->elements[entry].element; + + if (entry == vector->count - 1) + { + // Ensure the pointer is never reused by accident, but otherwise just + // decrement the pointer. + /// + vector->elements[entry].element = NULL; + vector->elements[entry].freeptr = NULL; + } + else + { + // Need to shuffle trailing pointers back over the deleted entry + // + ANTLR3_MEMMOVE(vector->elements + entry, vector->elements + entry + 1, sizeof(ANTLR3_VECTOR_ELEMENT) * (vector->count - entry - 1)); + } + + // One less entry in the vector now + // + vector->count--; + + return element; +} + +static void +antlr3VectorResize (pANTLR3_VECTOR vector, ANTLR3_UINT32 hint) +{ + ANTLR3_UINT32 newSize; + + // Need to resize the element pointers. We double the allocation + // we already have unless asked for a specific increase. + // + if (hint == 0 || hint < vector->elementsSize) + { + newSize = vector->elementsSize * 2; + } + else + { + newSize = hint * 2; + } + + // Now we know how many we need, so we see if we have just expanded + // past the built in vector elements or were already past that + // + if (vector->elementsSize > ANTLR3_VECTOR_INTERNAL_SIZE) + { + // We were already larger than the internal size, so we just + // use realloc so that the pointers are copied for us + // + vector->elements = (pANTLR3_VECTOR_ELEMENT)ANTLR3_REALLOC(vector->elements, (sizeof(ANTLR3_VECTOR_ELEMENT)* newSize)); + } + else + { + // The current size was less than or equal to the internal array size and as we always start + // with a size that is at least the maximum internal size, then we must need to allocate new memory + // for external pointers. We don't want to take the time to calculate if a requested element + // is part of the internal or external entries, so we copy the internal ones to the new space + // + vector->elements = (pANTLR3_VECTOR_ELEMENT)ANTLR3_MALLOC((sizeof(ANTLR3_VECTOR_ELEMENT)* newSize)); + ANTLR3_MEMCPY(vector->elements, vector->internal, ANTLR3_VECTOR_INTERNAL_SIZE * sizeof(ANTLR3_VECTOR_ELEMENT)); + } + + vector->elementsSize = newSize; +} + +/// Add the supplied pointer and freeing function pointer to the list, +/// expanding the vector if needed. +/// +static ANTLR3_UINT32 antlr3VectorAdd (pANTLR3_VECTOR vector, void * element, void (ANTLR3_CDECL *freeptr)(void *)) +{ + // Do we need to resize the vector table? + // + if (vector->count == vector->elementsSize) + { + antlr3VectorResize(vector, 0); // Give no hint, we let it add 1024 or double it + } + + // Insert the new entry + // + vector->elements[vector->count].element = element; + vector->elements[vector->count].freeptr = freeptr; + + vector->count++; // One more element counted + + return (ANTLR3_UINT32)(vector->count); + +} + +/// Replace the element at the specified entry point with the supplied +/// entry. +/// +static ANTLR3_UINT32 +antlr3VectorSet (pANTLR3_VECTOR vector, ANTLR3_UINT32 entry, void * element, void (ANTLR3_CDECL *freeptr)(void *), ANTLR3_BOOLEAN freeExisting) +{ + + // If the vector is currently not big enough, then we expand it + // + if (entry >= vector->elementsSize) + { + antlr3VectorResize(vector, entry); // We will get at least this many + } + + // Valid request, replace the current one, freeing any prior entry if told to + // + if ( entry < vector->count // If actually replacing an element + && freeExisting // And told to free any existing element + && vector->elements[entry].freeptr != NULL // And the existing element has a free pointer + ) + { + vector->elements[entry].freeptr(vector->elements[entry].element); + } + + // Install the new pointers + // + vector->elements[entry].freeptr = freeptr; + vector->elements[entry].element = element; + + if (entry >= vector->count) + { + vector->count = entry + 1; + } + return (ANTLR3_UINT32)(entry); // Indicates the replacement was successful + +} + +/// Replace the element at the specified entry point with the supplied +/// entry. +/// +static ANTLR3_BOOLEAN +antlr3VectorSwap (pANTLR3_VECTOR vector, ANTLR3_UINT32 entry1, ANTLR3_UINT32 entry2) +{ + + void * tempEntry; + void (ANTLR3_CDECL *freeptr)(void *); + + // If the vector is currently not big enough, then we do nothing + // + if (entry1 >= vector->elementsSize || entry2 >= vector->elementsSize) + { + return ANTLR3_FALSE; + } + + // Valid request, swap them + // + tempEntry = vector->elements[entry1].element; + freeptr = vector->elements[entry1].freeptr; + + // Install the new pointers + // + vector->elements[entry1].freeptr = vector->elements[entry2].freeptr; + vector->elements[entry1].element = vector->elements[entry2].element; + + vector->elements[entry2].freeptr = freeptr; + vector->elements[entry2].element = tempEntry; + + return ANTLR3_TRUE; + +} + +static ANTLR3_UINT32 antlr3VectorSize (pANTLR3_VECTOR vector) +{ + return vector->count; +} + +#ifdef ANTLR3_WINDOWS +#pragma warning (push) +#pragma warning (disable : 4100) +#endif +/// Vector factory creation +/// +ANTLR3_API pANTLR3_VECTOR_FACTORY +antlr3VectorFactoryNew (ANTLR3_UINT32 sizeHint) +{ + pANTLR3_VECTOR_FACTORY factory; + + // Allocate memory for the factory + // + factory = (pANTLR3_VECTOR_FACTORY)ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_VECTOR_FACTORY))); + + if (factory == NULL) + { + return NULL; + } + + // Factory memory is good, so create a new vector pool + // + factory->pools = NULL; + factory->thisPool = -1; + + newPool(factory); + + // Initialize the API, ignore the hint as this algorithm does + // a better job really. + // + antlr3SetVectorApi(&(factory->unTruc), ANTLR3_VECTOR_INTERNAL_SIZE); + + factory->unTruc.factoryMade = ANTLR3_TRUE; + + // Install the factory API + // + factory->close = closeVectorFactory; + factory->newVector = newVector; + factory->returnVector = returnVector; + + // Create a stack to accumulate reusable vectors + // + factory->freeStack = antlr3StackNew(16); + return factory; +} +#ifdef ANTLR3_WINDOWS +#pragma warning (pop) +#endif + +static void +returnVector (pANTLR3_VECTOR_FACTORY factory, pANTLR3_VECTOR vector) +{ + // First we need to clear out anything that is still in the vector + // + vector->clear(vector); + + // We have a free stack available so we can add the vector we were + // given into the free chain. The vector has to have come from this + // factory, so we already know how to release its memory when it + // dies by virtue of the factory being closed. + // + factory->freeStack->push(factory->freeStack, vector, NULL); + + // TODO: remove this line once happy printf("Returned vector %08X to the pool, stack size is %d\n", vector, factory->freeStack->size(factory->freeStack)); +} + +static void +newPool(pANTLR3_VECTOR_FACTORY factory) +{ + /* Increment factory count + */ + factory->thisPool++; + + /* Ensure we have enough pointers allocated + */ + factory->pools = (pANTLR3_VECTOR *) + ANTLR3_REALLOC( (void *)factory->pools, /* Current pools pointer (starts at NULL) */ + (ANTLR3_UINT32)((factory->thisPool + 1) * sizeof(pANTLR3_VECTOR *)) /* Memory for new pool pointers */ + ); + + /* Allocate a new pool for the factory + */ + factory->pools[factory->thisPool] = + (pANTLR3_VECTOR) + ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_VECTOR) * ANTLR3_FACTORY_VPOOL_SIZE)); + + + /* Reset the counters + */ + factory->nextVector = 0; + + /* Done + */ + return; +} + +static void +closeVectorFactory (pANTLR3_VECTOR_FACTORY factory) +{ + pANTLR3_VECTOR pool; + ANTLR3_INT32 poolCount; + ANTLR3_UINT32 limit; + ANTLR3_UINT32 vector; + pANTLR3_VECTOR check; + + // First see if we have a free chain stack to release? + // + if (factory->freeStack != NULL) + { + factory->freeStack->free(factory->freeStack); + } + + /* We iterate the vector pools one at a time + */ + for (poolCount = 0; poolCount <= factory->thisPool; poolCount++) + { + /* Pointer to current pool + */ + pool = factory->pools[poolCount]; + + /* Work out how many tokens we need to check in this pool. + */ + limit = (poolCount == factory->thisPool ? factory->nextVector : ANTLR3_FACTORY_VPOOL_SIZE); + + /* Marginal condition, we might be at the start of a brand new pool + * where the nextToken is 0 and nothing has been allocated. + */ + if (limit > 0) + { + /* We have some vectors allocated from this pool + */ + for (vector = 0; vector < limit; vector++) + { + /* Next one in the chain + */ + check = pool + vector; + + // Call the free function on each of the vectors in the pool, + // which in turn will cause any elements it holds that also have a free + // pointer to be freed. However, because any vector may be in any other + // vector, we don't free the element allocations yet. We do that in a + // a specific pass, coming up next. The vector free function knows that + // this is a factory allocated pool vector and so it won't free things it + // should not. + // + check->free(check); + } + } + } + + /* We iterate the vector pools one at a time once again, but this time + * we are going to free up any allocated element pointers. Note that we are doing this + * so that we do not try to release vectors twice. When building ASTs we just copy + * the vectors all over the place and they may be embedded in this vector pool + * numerous times. + */ + for (poolCount = 0; poolCount <= factory->thisPool; poolCount++) + { + /* Pointer to current pool + */ + pool = factory->pools[poolCount]; + + /* Work out how many tokens we need to check in this pool. + */ + limit = (poolCount == factory->thisPool ? factory->nextVector : ANTLR3_FACTORY_VPOOL_SIZE); + + /* Marginal condition, we might be at the start of a brand new pool + * where the nextToken is 0 and nothing has been allocated. + */ + if (limit > 0) + { + /* We have some vectors allocated from this pool + */ + for (vector = 0; vector < limit; vector++) + { + /* Next one in the chain + */ + check = pool + vector; + + // Anything in here should be factory made, but we do this just + // to triple check. We just free up the elements if they were + // allocated beyond the internal size. + // + if (check->factoryMade == ANTLR3_TRUE && check->elementsSize > ANTLR3_VECTOR_INTERNAL_SIZE) + { + ANTLR3_FREE(check->elements); + check->elements = NULL; + } + } + } + + // We can now free this pool allocation as we have called free on every element in every vector + // and freed any memory for pointers the grew beyond the internal size limit. + // + ANTLR3_FREE(factory->pools[poolCount]); + factory->pools[poolCount] = NULL; + } + + /* All the pools are deallocated we can free the pointers to the pools + * now. + */ + ANTLR3_FREE(factory->pools); + + /* Finally, we can free the space for the factory itself + */ + ANTLR3_FREE(factory); + +} + +static pANTLR3_VECTOR +newVector(pANTLR3_VECTOR_FACTORY factory) +{ + pANTLR3_VECTOR vector; + + // If we have anything on the re claim stack, reuse it + // + vector = factory->freeStack->peek(factory->freeStack); + + if (vector != NULL) + { + // Cool we got something we could reuse + // + factory->freeStack->pop(factory->freeStack); + + // TODO: remove this line once happy printf("Reused vector %08X from stack, size is now %d\n", vector, factory->freeStack->size(factory->freeStack)); + return vector; + + } + + // See if we need a new vector pool before allocating a new + // one + // + if (factory->nextVector >= ANTLR3_FACTORY_VPOOL_SIZE) + { + // We ran out of vectors in the current pool, so we need a new pool + // + newPool(factory); + } + + // Assuming everything went well (we are trying for performance here so doing minimal + // error checking. Then we can work out what the pointer is to the next vector. + // + vector = factory->pools[factory->thisPool] + factory->nextVector; + factory->nextVector++; + + // We have our token pointer now, so we can initialize it to the predefined model. + // + antlr3SetVectorApi(vector, ANTLR3_VECTOR_INTERNAL_SIZE); + vector->factoryMade = ANTLR3_TRUE; + + // We know that the pool vectors are created at the default size, which means they + // will start off using their internal entry pointers. We must intialize our pool vector + // to point to its own internal entry table and not the pre-made one. + // + vector->elements = vector->internal; + + // TODO: remove this line once happy printf("Used a new vector at %08X from the pools as nothing on the reusue stack\n", vector); + + // And we are done + // + return vector; +} + +/** Array of left most significant bit positions for an 8 bit + * element provides an efficient way to find the highest bit + * that is set in an n byte value (n>0). Assuming the values will all hit the data cache, + * coding without conditional elements should allow branch + * prediction to work well and of course a parallel instruction cache + * will whip through this. Otherwise we must loop shifting a one + * bit and masking. The values we tend to be placing in out integer + * patricia trie are usually a lot lower than the 64 bits we + * allow for the key allows. Hence there is a lot of redundant looping and + * shifting in a while loop. Whereas, the lookup table is just + * a few ands and indirect lookups, while testing for 0. This + * is likely to be done in parallel on many processors available + * when I wrote this. If this code survives as long as yacc, then + * I may already be dead by the time you read this and maybe there is + * a single machine instruction to perform the operation. What + * else are you going to do with all those transistors? Jim 2007 + * + * The table is probably obvious but it is just the number 0..7 + * of the MSB in each integer value 0..256 + */ +static ANTLR3_UINT8 bitIndex[256] = +{ + 0, // 0 - Just for padding + 0, // 1 + 1, 1, // 2..3 + 2, 2, 2, 2, // 4..7 + 3, 3, 3, 3, 3, 3, 3, 3, // 8+ + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 16+ + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, // 32+ + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, // 64+ + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 128+ + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +}; + +/** Rather than use the bit index of a trie node to shift + * 0x01 left that many times, then & with the result, it is + * faster to use the bit index as an index into this table + * which holds precomputed masks for any of the 64 bits + * we need to mask off singly. The data values will stay in + * cache while ever a trie is in heavy use, such as in + * memoization. It is also pretty enough to be ASCII art. + */ +static ANTLR3_UINT64 bitMask[64] = +{ + 0x0000000000000001ULL, 0x0000000000000002ULL, 0x0000000000000004ULL, 0x0000000000000008ULL, + 0x0000000000000010ULL, 0x0000000000000020ULL, 0x0000000000000040ULL, 0x0000000000000080ULL, + 0x0000000000000100ULL, 0x0000000000000200ULL, 0x0000000000000400ULL, 0x0000000000000800ULL, + 0x0000000000001000ULL, 0x0000000000002000ULL, 0x0000000000004000ULL, 0x0000000000008000ULL, + 0x0000000000010000ULL, 0x0000000000020000ULL, 0x0000000000040000ULL, 0x0000000000080000ULL, + 0x0000000000100000ULL, 0x0000000000200000ULL, 0x0000000000400000ULL, 0x0000000000800000ULL, + 0x0000000001000000ULL, 0x0000000002000000ULL, 0x0000000004000000ULL, 0x0000000008000000ULL, + 0x0000000010000000ULL, 0x0000000020000000ULL, 0x0000000040000000ULL, 0x0000000080000000ULL, + 0x0000000100000000ULL, 0x0000000200000000ULL, 0x0000000400000000ULL, 0x0000000800000000ULL, + 0x0000001000000000ULL, 0x0000002000000000ULL, 0x0000004000000000ULL, 0x0000008000000000ULL, + 0x0000010000000000ULL, 0x0000020000000000ULL, 0x0000040000000000ULL, 0x0000080000000000ULL, + 0x0000100000000000ULL, 0x0000200000000000ULL, 0x0000400000000000ULL, 0x0000800000000000ULL, + 0x0001000000000000ULL, 0x0002000000000000ULL, 0x0004000000000000ULL, 0x0008000000000000ULL, + 0x0010000000000000ULL, 0x0020000000000000ULL, 0x0040000000000000ULL, 0x0080000000000000ULL, + 0x0100000000000000ULL, 0x0200000000000000ULL, 0x0400000000000000ULL, 0x0800000000000000ULL, + 0x1000000000000000ULL, 0x2000000000000000ULL, 0x4000000000000000ULL, 0x8000000000000000ULL +}; + +/* INT TRIE Implementation of depth 64 bits, being the number of bits + * in a 64 bit integer. + */ + +pANTLR3_INT_TRIE +antlr3IntTrieNew(ANTLR3_UINT32 depth) +{ + pANTLR3_INT_TRIE trie; + + trie = (pANTLR3_INT_TRIE) ANTLR3_CALLOC(1, sizeof(ANTLR3_INT_TRIE)); /* Base memory required */ + + if (trie == NULL) + { + return (pANTLR3_INT_TRIE) ANTLR3_FUNC_PTR(ANTLR3_ERR_NOMEM); + } + + /* Now we need to allocate the root node. This makes it easier + * to use the tree as we don't have to do anything special + * for the root node. + */ + trie->root = (pANTLR3_INT_TRIE_NODE) ANTLR3_CALLOC(1, sizeof(ANTLR3_INT_TRIE)); + + if (trie->root == NULL) + { + ANTLR3_FREE(trie); + return (pANTLR3_INT_TRIE) ANTLR3_FUNC_PTR(ANTLR3_ERR_NOMEM); + } + + trie->add = intTrieAdd; + trie->del = intTrieDel; + trie->free = intTrieFree; + trie->get = intTrieGet; + + /* Now we seed the root node with the index being the + * highest left most bit we want to test, which limits the + * keys in the trie. This is the trie 'depth'. The limit for + * this implementation is 63 (bits 0..63). + */ + trie->root->bitNum = depth; + + /* And as we have nothing in here yet, we set both child pointers + * of the root node to point back to itself. + */ + trie->root->leftN = trie->root; + trie->root->rightN = trie->root; + trie->count = 0; + + /* Finally, note that the key for this root node is 0 because + * we use calloc() to initialise it. + */ + + return trie; +} + +/** Search the int Trie and return a pointer to the first bucket indexed + * by the key if it is contained in the trie, otherwise NULL. + */ +static pANTLR3_TRIE_ENTRY +intTrieGet (pANTLR3_INT_TRIE trie, ANTLR3_INTKEY key) +{ + pANTLR3_INT_TRIE_NODE thisNode; + pANTLR3_INT_TRIE_NODE nextNode; + + if (trie->count == 0) + { + return NULL; /* Nothing in this trie yet */ + } + /* Starting at the root node in the trie, compare the bit index + * of the current node with its next child node (starts left from root). + * When the bit index of the child node is greater than the bit index of the current node + * then by definition (as the bit index decreases as we descent the trie) + * we have reached a 'backward' pointer. A backward pointer means we + * have reached the only node that can be reached by the bits given us so far + * and it must either be the key we are looking for, or if not then it + * means the entry was not in the trie, and we return NULL. A backward pointer + * points back in to the tree structure rather than down (deeper) within the + * tree branches. + */ + thisNode = trie->root; /* Start at the root node */ + nextNode = thisNode->leftN; /* Examine the left node from the root */ + + /* While we are descending the tree nodes... + */ + while (thisNode->bitNum > nextNode->bitNum) + { + /* Next node now becomes the new 'current' node + */ + thisNode = nextNode; + + /* We now test the bit indicated by the bitmap in the next node + * in the key we are searching for. The new next node is the + * right node if that bit is set and the left node it is not. + */ + if (key & bitMask[nextNode->bitNum]) + { + nextNode = nextNode->rightN; /* 1 is right */ + } + else + { + nextNode = nextNode->leftN; /* 0 is left */ + } + } + + /* Here we have reached a node where the bitMap index is lower than + * its parent. This means it is pointing backward in the tree and + * must therefore be a terminal node, being the only point than can + * be reached with the bits seen so far. It is either the actual key + * we wanted, or if that key is not in the trie it is another key + * that is currently the only one that can be reached by those bits. + * That situation would obviously change if the key was to be added + * to the trie. + * + * Hence it only remains to test whether this is actually the key or not. + */ + if (nextNode->key == key) + { + /* This was the key, so return the entry pointer + */ + return nextNode->buckets; + } + else + { + return NULL; /* That key is not in the trie (note that we set the pointer to -1 if no payload) */ + } +} + + +static ANTLR3_BOOLEAN +intTrieDel (pANTLR3_INT_TRIE trie, ANTLR3_INTKEY key) +{ + pANTLR3_INT_TRIE_NODE p; + + p=trie->root; + key = key; + + return ANTLR3_FALSE; +} + +/** Add an entry into the INT trie. + * Basically we descend the trie as we do when searching it, which will + * locate the only node in the trie that can be reached by the bit pattern of the + * key. If the key is actually at that node, then if the trie accepts duplicates + * we add the supplied data in a new chained bucket to that data node. If it does + * not accept duplicates then we merely return FALSE in case the caller wants to know + * whether the key was already in the trie. + * If the node we locate is not the key we are looking to add, then we insert a new node + * into the trie with a bit index of the leftmost differing bit and the left or right + * node pointing to itself or the data node we are inserting 'before'. + */ +static ANTLR3_BOOLEAN +intTrieAdd (pANTLR3_INT_TRIE trie, ANTLR3_INTKEY key, ANTLR3_UINT32 type, ANTLR3_INTKEY intVal, void * data, void (ANTLR3_CDECL *freeptr)(void *)) +{ + pANTLR3_INT_TRIE_NODE thisNode; + pANTLR3_INT_TRIE_NODE nextNode; + pANTLR3_INT_TRIE_NODE entNode; + ANTLR3_UINT32 depth; + pANTLR3_TRIE_ENTRY newEnt; + pANTLR3_TRIE_ENTRY nextEnt; + ANTLR3_INTKEY xorKey; + + /* Cache the bit depth of this trie, which is always the highest index, + * which is in the root node + */ + depth = trie->root->bitNum; + + thisNode = trie->root; /* Start with the root node */ + nextNode = trie->root->leftN; /* And assume we start to the left */ + + /* Now find the only node that can be currently reached by the bits in the + * key we are being asked to insert. + */ + while (thisNode->bitNum > nextNode->bitNum) + { + /* Still descending the structure, next node becomes current. + */ + thisNode = nextNode; + + if (key & bitMask[nextNode->bitNum]) + { + /* Bit at the required index was 1, so travers the right node from here + */ + nextNode = nextNode->rightN; + } + else + { + /* Bit at the required index was 0, so we traverse to the left + */ + nextNode = nextNode->leftN; + } + } + /* Here we have located the only node that can be reached by the + * bits in the requested key. It could in fact be that key or the node + * we need to use to insert the new key. + */ + if (nextNode->key == key) + { + /* We have located an exact match, but we will only append to the bucket chain + * if this trie accepts duplicate keys. + */ + if (trie->allowDups ==ANTLR3_TRUE) + { + /* Yes, we are accepting duplicates + */ + newEnt = (pANTLR3_TRIE_ENTRY)ANTLR3_CALLOC(1, sizeof(ANTLR3_TRIE_ENTRY)); + + if (newEnt == NULL) + { + /* Out of memory, all we can do is return the fact that the insert failed. + */ + return ANTLR3_FALSE; + } + + /* Otherwise insert this in the chain + */ + newEnt->type = type; + newEnt->freeptr = freeptr; + if (type == ANTLR3_HASH_TYPE_STR) + { + newEnt->data.ptr = data; + } + else + { + newEnt->data.intVal = intVal; + } + + /* We want to be able to traverse the stored elements in the order that they were + * added as duplicate keys. We might need to revise this opinion if we end up having many duplicate keys + * as perhaps reverse order is just as good, so long as it is ordered. + */ + nextEnt = nextNode->buckets; + while (nextEnt->next != NULL) + { + nextEnt = nextEnt->next; + } + nextEnt->next = newEnt; + + trie->count++; + return ANTLR3_TRUE; + } + else + { + /* We found the key is already there and we are not allowed duplicates in this + * trie. + */ + return ANTLR3_FALSE; + } + } + + /* Here we have discovered the only node that can be reached by the bits in the key + * but we have found that this node is not the key we need to insert. We must find the + * the leftmost bit by which the current key for that node and the new key we are going + * to insert, differ. While this nested series of ifs may look a bit strange, experimentation + * showed that it allows a machine code path that works well with predicated execution + */ + xorKey = (key ^ nextNode->key); /* Gives 1 bits only where they differ then we find the left most 1 bit*/ + + /* Most common case is a 32 bit key really + */ +#ifdef ANTLR3_USE_64BIT + if (xorKey & 0xFFFFFFFF00000000) + { + if (xorKey & 0xFFFF000000000000) + { + if (xorKey & 0xFF00000000000000) + { + depth = 56 + bitIndex[((xorKey & 0xFF00000000000000)>>56)]; + } + else + { + depth = 48 + bitIndex[((xorKey & 0x00FF000000000000)>>48)]; + } + } + else + { + if (xorKey & 0x0000FF0000000000) + { + depth = 40 + bitIndex[((xorKey & 0x0000FF0000000000)>>40)]; + } + else + { + depth = 32 + bitIndex[((xorKey & 0x000000FF00000000)>>32)]; + } + } + } + else +#endif + { + if (xorKey & 0x00000000FFFF0000) + { + if (xorKey & 0x00000000FF000000) + { + depth = 24 + bitIndex[((xorKey & 0x00000000FF000000)>>24)]; + } + else + { + depth = 16 + bitIndex[((xorKey & 0x0000000000FF0000)>>16)]; + } + } + else + { + if (xorKey & 0x000000000000FF00) + { + depth = 8 + bitIndex[((xorKey & 0x0000000000000FF00)>>8)]; + } + else + { + depth = bitIndex[xorKey & 0x00000000000000FF]; + } + } + } + + /* We have located the leftmost differing bit, indicated by the depth variable. So, we know what + * bit index we are to insert the new entry at. There are two cases, being where the two keys + * differ at a bit position that is not currently part of the bit testing, where they differ on a bit + * that is currently being skipped in the indexed comparisons, and where they differ on a bit + * that is merely lower down in the current bit search. If the bit index went bit 4, bit 2 and they differ + * at bit 3, then we have the "skipped" bit case. But if that chain was Bit 4, Bit 2 and they differ at bit 1 + * then we have the easy bit <pun>. + * + * So, set up to descend the tree again, but this time looking for the insert point + * according to whether we skip the bit that differs or not. + */ + thisNode = trie->root; + entNode = trie->root->leftN; + + /* Note the slight difference in the checks here to cover both cases + */ + while (thisNode->bitNum > entNode->bitNum && entNode->bitNum > depth) + { + /* Still descending the structure, next node becomes current. + */ + thisNode = entNode; + + if (key & bitMask[entNode->bitNum]) + { + /* Bit at the required index was 1, so traverse the right node from here + */ + entNode = entNode->rightN; + } + else + { + /* Bit at the required index was 0, so we traverse to the left + */ + entNode = entNode->leftN; + } + } + + /* We have located the correct insert point for this new key, so we need + * to allocate our entry and insert it etc. + */ + nextNode = (pANTLR3_INT_TRIE_NODE)ANTLR3_CALLOC(1, sizeof(ANTLR3_INT_TRIE_NODE)); + if (nextNode == NULL) + { + /* All that work and no memory - bummer. + */ + return ANTLR3_FALSE; + } + + /* Build a new entry block for the new node + */ + newEnt = (pANTLR3_TRIE_ENTRY)ANTLR3_CALLOC(1, sizeof(ANTLR3_TRIE_ENTRY)); + + if (newEnt == NULL) + { + /* Out of memory, all we can do is return the fact that the insert failed. + */ + return ANTLR3_FALSE; + } + + /* Otherwise enter this in our new node + */ + newEnt->type = type; + newEnt->freeptr = freeptr; + if (type == ANTLR3_HASH_TYPE_STR) + { + newEnt->data.ptr = data; + } + else + { + newEnt->data.intVal = intVal; + } + /* Install it + */ + nextNode->buckets = newEnt; + nextNode->key = key; + nextNode->bitNum = depth; + + /* Work out the right and left pointers for this new node, which involve + * terminating with the current found node either right or left according + * to whether the current index bit is 1 or 0 + */ + if (key & bitMask[depth]) + { + nextNode->leftN = entNode; /* Terminates at previous position */ + nextNode->rightN = nextNode; /* Terminates with itself */ + } + else + { + nextNode->rightN = entNode; /* Terminates at previous position */ + nextNode->leftN = nextNode; /* Terminates with itself */ + } + + /* Finally, we need to change the pointers at the node we located + * for inserting. If the key bit at its index is set then the right + * pointer for that node becomes the newly created node, otherwise the left + * pointer does. + */ + if (key & bitMask[thisNode->bitNum] ) + { + thisNode->rightN = nextNode; + } + else + { + thisNode->leftN = nextNode; + } + + /* Et voila + */ + trie->count++; + return ANTLR3_TRUE; + +} +/** Release memory allocated to this tree. + * Basic algorithm is that we do a depth first left descent and free + * up any nodes that are not backward pointers. + */ +static void +freeIntNode(pANTLR3_INT_TRIE_NODE node) +{ + pANTLR3_TRIE_ENTRY thisEntry; + pANTLR3_TRIE_ENTRY nextEntry; + + /* If this node has a left pointer that is not a back pointer + * then recursively call to free this + */ + if (node->bitNum > node->leftN->bitNum) + { + /* We have a left node that needs descending, so do it. + */ + freeIntNode(node->leftN); + } + + /* The left nodes from here should now be dealt with, so + * we need to descend any right nodes that are not back pointers + */ + if (node->bitNum > node->rightN->bitNum) + { + /* There are some right nodes to descend and deal with. + */ + freeIntNode(node->rightN); + } + + /* Now all the children are dealt with, we can destroy + * this node too + */ + thisEntry = node->buckets; + + while (thisEntry != NULL) + { + nextEntry = thisEntry->next; + + /* Do we need to call a custom free pointer for this string entry? + */ + if (thisEntry->type == ANTLR3_HASH_TYPE_STR && thisEntry->freeptr != NULL) + { + thisEntry->freeptr(thisEntry->data.ptr); + } + + /* Now free the data for this bucket entry + */ + ANTLR3_FREE(thisEntry); + thisEntry = nextEntry; /* See if there are any more to free */ + } + + /* The bucket entry is now gone, so we can free the memory for + * the entry itself. + */ + ANTLR3_FREE(node); + + /* And that should be it for everything under this node and itself + */ +} + +/** Called to free all nodes and the structure itself. + */ +static void +intTrieFree (pANTLR3_INT_TRIE trie) +{ + /* Descend from the root and free all the nodes + */ + freeIntNode(trie->root); + + /* the nodes are all gone now, so we need only free the memory + * for the structure itself + */ + ANTLR3_FREE(trie); +} + + +/** + * Allocate and initialize a new ANTLR3 topological sorter, which can be + * used to define edges that identify numerical node indexes that depend on other + * numerical node indexes, which can then be sorted topologically such that + * any node is sorted after all its dependent nodes. + * + * Use: + * + * /verbatim + + pANTLR3_TOPO topo; + topo = antlr3NewTopo(); + + if (topo == NULL) { out of memory } + + topo->addEdge(topo, 3, 0); // Node 3 depends on node 0 + topo->addEdge(topo, 0, 1); // Node - depends on node 1 + topo->sortVector(topo, myVector); // Sort the vector in place (node numbers are the vector entry numbers) + + * /verbatim + */ +ANTLR3_API pANTLR3_TOPO +antlr3TopoNew() +{ + pANTLR3_TOPO topo = (pANTLR3_TOPO)ANTLR3_MALLOC(sizeof(ANTLR3_TOPO)); + + if (topo == NULL) + { + return NULL; + } + + // Initialize variables + // + + topo->visited = NULL; // Don't know how big it is yet + topo->limit = 1; // No edges added yet + topo->edges = NULL; // No edges added yet + topo->sorted = NULL; // Nothing sorted at the start + topo->cycle = NULL; // No cycles at the start + topo->cycleMark = 0; // No cycles at the start + topo->hasCycle = ANTLR3_FALSE; // No cycle at the start + + // API + // + topo->addEdge = addEdge; + topo->sortToArray = sortToArray; + topo->sortVector = sortVector; + topo->free = freeTopo; + + return topo; +} +// Topological sorter +// +static void +addEdge (pANTLR3_TOPO topo, ANTLR3_UINT32 edge, ANTLR3_UINT32 dependency) +{ + ANTLR3_UINT32 i; + ANTLR3_UINT32 maxEdge; + pANTLR3_BITSET edgeDeps; + + if (edge>dependency) + { + maxEdge = edge; + } + else + { + maxEdge = dependency; + } + // We need to add an edge to says that the node indexed by 'edge' is + // dependent on the node indexed by 'dependency' + // + + // First see if we have enough room in the edges array to add the edge? + // + if (topo->edges == NULL) + { + // We don't have any edges yet, so create an array to hold them + // + topo->edges = ANTLR3_CALLOC(sizeof(pANTLR3_BITSET) * (maxEdge + 1), 1); + if (topo->edges == NULL) + { + return; + } + + // Set the limit to what we have now + // + topo->limit = maxEdge + 1; + } + else if (topo->limit <= maxEdge) + { + // WE have some edges but not enough + // + topo->edges = ANTLR3_REALLOC(topo->edges, sizeof(pANTLR3_BITSET) * (maxEdge + 1)); + if (topo->edges == NULL) + { + return; + } + + // Initialize the new bitmaps to ;indicate we have no edges defined yet + // + for (i = topo->limit; i <= maxEdge; i++) + { + *((topo->edges) + i) = NULL; + } + + // Set the limit to what we have now + // + topo->limit = maxEdge + 1; + } + + // If the edge was flagged as depending on itself, then we just + // do nothing as it means this routine was just called to add it + // in to the list of nodes. + // + if (edge == dependency) + { + return; + } + + // Pick up the bit map for the requested edge + // + edgeDeps = *((topo->edges) + edge); + + if (edgeDeps == NULL) + { + // No edges are defined yet for this node + // + edgeDeps = antlr3BitsetNew(0); + *((topo->edges) + edge) = edgeDeps; + if (edgeDeps == NULL ) + { + return; // Out of memory + } + } + + // Set the bit in the bitmap that corresponds to the requested + // dependency. + // + edgeDeps->add(edgeDeps, dependency); + + // And we are all set + // + return; +} + + +/** + * Given a starting node, descend its dependent nodes (ones that it has edges + * to) until we find one without edges. Having found a node without edges, we have + * discovered the bottom of a depth first search, which we can then ascend, adding + * the nodes in order from the bottom, which gives us the dependency order. + */ +static void +DFS(pANTLR3_TOPO topo, ANTLR3_UINT32 node) +{ + pANTLR3_BITSET edges; + + // Guard against a revisit and check for cycles + // + if (topo->hasCycle == ANTLR3_TRUE) + { + return; // We don't do anything else if we found a cycle + } + + if (topo->visited->isMember(topo->visited, node)) + { + // Check to see if we found a cycle. To do this we search the + // current cycle stack and see if we find this node already in the stack. + // + ANTLR3_UINT32 i; + + for (i=0; i<topo->cycleMark; i++) + { + if (topo->cycle[i] == node) + { + // Stop! We found a cycle in the input, so rejig the cycle + // stack so that it only contains the cycle and set the cycle flag + // which will tell the caller what happened + // + ANTLR3_UINT32 l; + + for (l = i; l < topo->cycleMark; l++) + { + topo->cycle[l - i] = topo->cycle[l]; // Move to zero base in the cycle list + } + + // Recalculate the limit + // + topo->cycleMark -= i; + + // Signal disaster + // + topo->hasCycle = ANTLR3_TRUE; + } + } + return; + } + + // So far, no cycles have been found and we have not visited this node yet, + // so this node needs to go into the cycle stack before we continue + // then we will take it out of the stack once we have descended all its + // dependencies. + // + topo->cycle[topo->cycleMark++] = node; + + // First flag that we have visited this node + // + topo->visited->add(topo->visited, node); + + // Now, if this node has edges, then we want to ensure we visit + // them all before we drop through and add this node into the sorted + // list. + // + edges = *((topo->edges) + node); + if (edges != NULL) + { + // We have some edges, so visit each of the edge nodes + // that have not already been visited. + // + ANTLR3_UINT32 numBits; // How many bits are in the set + ANTLR3_UINT32 i; + ANTLR3_UINT32 range; + + numBits = edges->numBits(edges); + range = edges->size(edges); // Number of set bits + + // Stop if we exahust the bit list or have checked the + // number of edges that this node refers to (so we don't + // check bits at the end that cannot possibly be set). + // + for (i=0; i<= numBits && range > 0; i++) + { + if (edges->isMember(edges, i)) + { + range--; // About to check another one + + // Found an edge, make sure we visit and descend it + // + DFS(topo, i); + } + } + } + + // At this point we will have visited all the dependencies + // of this node and they will be ordered (even if there are cycles) + // So we just add the node into the sorted list at the + // current index position. + // + topo->sorted[topo->limit++] = node; + + // Remove this node from the cycle list if we have not detected a cycle + // + if (topo->hasCycle == ANTLR3_FALSE) + { + topo->cycleMark--; + } + + return; +} + +static pANTLR3_UINT32 +sortToArray (pANTLR3_TOPO topo) +{ + ANTLR3_UINT32 v; + ANTLR3_UINT32 oldLimit; + + // Guard against being called with no edges defined + // + if (topo->edges == NULL) + { + return 0; + } + // First we need a vector to populate with enough + // entries to accomodate the sorted list and another to accomodate + // the maximum cycle we could detect which is all nodes such as 0->1->2->3->0 + // + topo->sorted = ANTLR3_MALLOC(topo->limit * sizeof(ANTLR3_UINT32)); + topo->cycle = ANTLR3_MALLOC(topo->limit * sizeof(ANTLR3_UINT32)); + + // Next we need an empty bitset to show whether we have visited a node + // or not. This is the bit that gives us linear time of course as we are essentially + // dropping through the nodes in depth first order and when we get to a node that + // has no edges, we pop back up the stack adding the nodes we traversed in reverse + // order. + // + topo->visited = antlr3BitsetNew(0); + + // Now traverse the nodes as if we were just going left to right, but + // then descend each node unless it has already been visited. + // + oldLimit = topo->limit; // Number of nodes to traverse linearly + topo->limit = 0; // Next entry in the sorted table + + for (v = 0; v < oldLimit; v++) + { + // If we did not already visit this node, then descend it until we + // get a node without edges or arrive at a node we have already visited. + // + if (topo->visited->isMember(topo->visited, v) == ANTLR3_FALSE) + { + // We have not visited this one so descend it + // + DFS(topo, v); + } + + // Break the loop if we detect a cycle as we have no need to go any + // further + // + if (topo->hasCycle == ANTLR3_TRUE) + { + break; + } + } + + // Reset the limit to the number we recorded as if we hit a + // cycle, then limit will have stopped at the node where we + // discovered the cycle, but in order to free the edge bitmaps + // we need to know how many we may have allocated and traverse them all. + // + topo->limit = oldLimit; + + // Having traversed all the nodes we were given, we + // are guaranteed to have ordered all the nodes or detected a + // cycle. + // + return topo->sorted; +} + +static void +sortVector (pANTLR3_TOPO topo, pANTLR3_VECTOR v) +{ + // To sort a vector, we first perform the + // sort to an array, then use the results to reorder the vector + // we are given. This is just a convenience routine that allows you to + // sort the children of a tree node into topological order before or + // during an AST walk. This can be useful for optimizations that require + // dag reorders and also when the input stream defines thigns that are + // interdependent and you want to walk the list of the generated trees + // for those things in topological order so you can ignore the interdependencies + // at that point. + // + ANTLR3_UINT32 i; + + // Used as a lookup index to find the current location in the vector of + // the vector entry that was originally at position [0], [1], [2] etc + // + pANTLR3_UINT32 vIndex; + + // Sort into an array, then we can use the array that is + // stored in the topo + // + if (topo->sortToArray(topo) == 0) + { + return; // There were no edges + } + + if (topo->hasCycle == ANTLR3_TRUE) + { + return; // Do nothing if we detected a cycle + } + + // Ensure that the vector we are sorting is at least as big as the + // the input sequence we were adsked to sort. It does not matter if it is + // bigger as thaat probably just means that nodes numbered higher than the + // limit had no dependencies and so can be left alone. + // + if (topo->limit > v->count) + { + // We can only sort the entries that we have dude! The caller is + // responsible for ensuring the vector is the correct one and is the + // correct size etc. + // + topo->limit = v->count; + } + // We need to know the locations of each of the entries + // in the vector as we don't want to duplicate them in a new vector. We + // just use an indirection table to get the vector entry for a particular sequence + // acording to where we moved it last. Then we can just swap vector entries until + // we are done :-) + // + vIndex = ANTLR3_MALLOC(topo->limit * sizeof(ANTLR3_UINT32)); + + // Start index, each vector entry is located where you think it is + // + for (i = 0; i < topo->limit; i++) + { + vIndex[i] = i; + } + + // Now we traverse the sorted array and moved the entries of + // the vector around according to the sort order and the indirection + // table we just created. The index telsl us where in the vector the + // original element entry n is now located via vIndex[n]. + // + for (i=0; i < topo->limit; i++) + { + ANTLR3_UINT32 ind; + + // If the vector entry at i is already the one that it + // should be, then we skip moving it of course. + // + if (vIndex[topo->sorted[i]] == i) + { + continue; + } + + // The vector entry at i, should be replaced with the + // vector entry indicated by topo->sorted[i]. The vector entry + // at topo->sorted[i] may have already been swapped out though, so we + // find where it is now and move it from there to i. + // + ind = vIndex[topo->sorted[i]]; + v->swap(v, i, ind); + + // Update our index. The element at i is now the one we wanted + // to be sorted here and the element we swapped out is now the + // element that was at i just before we swapped it. If you are lost now + // don't worry about it, we are just reindexing on the fly is all. + // + vIndex[topo->sorted[i]] = i; + vIndex[i] = ind; + } + + // Having traversed all the entries, we have sorted the vector in place. + // + ANTLR3_FREE(vIndex); + return; +} + +static void +freeTopo (pANTLR3_TOPO topo) +{ + ANTLR3_UINT32 i; + + // Free the result vector + // + if (topo->sorted != NULL) + { + ANTLR3_FREE(topo->sorted); + topo->sorted = NULL; + } + + // Free the visited map + // + if (topo->visited != NULL) + { + + topo->visited->free(topo->visited); + topo->visited = NULL; + } + + // Free any edgemaps + // + if (topo->edges != NULL) + { + pANTLR3_BITSET edgeList; + + + for (i=0; i<topo->limit; i++) + { + edgeList = *((topo->edges) + i); + if (edgeList != NULL) + { + edgeList->free(edgeList); + } + } + + ANTLR3_FREE(topo->edges); + } + topo->edges = NULL; + + // Free any cycle map + // + if (topo->cycle != NULL) + { + ANTLR3_FREE(topo->cycle); + } + + ANTLR3_FREE(topo); +} diff --git a/impl/antlr/libantlr3c-3.4/src/antlr3commontoken.c b/impl/antlr/libantlr3c-3.4/src/antlr3commontoken.c new file mode 100644 index 0000000..2627431 --- /dev/null +++ b/impl/antlr/libantlr3c-3.4/src/antlr3commontoken.c @@ -0,0 +1,586 @@ +/** + * Contains the default implementation of the common token used within + * java. Custom tokens should create this structure and then append to it using the + * custom pointer to install their own structure and API. + */ + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <antlr3.h> + +/* Token API + */ +static pANTLR3_STRING getText (pANTLR3_COMMON_TOKEN token); +static void setText (pANTLR3_COMMON_TOKEN token, pANTLR3_STRING text); +static void setText8 (pANTLR3_COMMON_TOKEN token, pANTLR3_UINT8 text); +static ANTLR3_UINT32 getType (pANTLR3_COMMON_TOKEN token); +static void setType (pANTLR3_COMMON_TOKEN token, ANTLR3_UINT32 type); +static ANTLR3_UINT32 getLine (pANTLR3_COMMON_TOKEN token); +static void setLine (pANTLR3_COMMON_TOKEN token, ANTLR3_UINT32 line); +static ANTLR3_INT32 getCharPositionInLine (pANTLR3_COMMON_TOKEN token); +static void setCharPositionInLine (pANTLR3_COMMON_TOKEN token, ANTLR3_INT32 pos); +static ANTLR3_UINT32 getChannel (pANTLR3_COMMON_TOKEN token); +static void setChannel (pANTLR3_COMMON_TOKEN token, ANTLR3_UINT32 channel); +static ANTLR3_MARKER getTokenIndex (pANTLR3_COMMON_TOKEN token); +static void setTokenIndex (pANTLR3_COMMON_TOKEN token, ANTLR3_MARKER); +static ANTLR3_MARKER getStartIndex (pANTLR3_COMMON_TOKEN token); +static void setStartIndex (pANTLR3_COMMON_TOKEN token, ANTLR3_MARKER index); +static ANTLR3_MARKER getStopIndex (pANTLR3_COMMON_TOKEN token); +static void setStopIndex (pANTLR3_COMMON_TOKEN token, ANTLR3_MARKER index); +static pANTLR3_STRING toString (pANTLR3_COMMON_TOKEN token); + +/* Factory API + */ +static void factoryClose (pANTLR3_TOKEN_FACTORY factory); +static pANTLR3_COMMON_TOKEN newToken (void); +static void setInputStream (pANTLR3_TOKEN_FACTORY factory, pANTLR3_INPUT_STREAM input); +static void factoryReset (pANTLR3_TOKEN_FACTORY factory); + +/* Internal management functions + */ +static void newPool (pANTLR3_TOKEN_FACTORY factory); +static pANTLR3_COMMON_TOKEN newPoolToken (pANTLR3_TOKEN_FACTORY factory); + + +ANTLR3_API pANTLR3_COMMON_TOKEN +antlr3CommonTokenNew(ANTLR3_UINT32 ttype) +{ + pANTLR3_COMMON_TOKEN token; + + // Create a raw token with the interface installed + // + token = newToken(); + + if (token != NULL) + { + token->setType(token, ttype); + } + + // All good + // + return token; +} + +ANTLR3_API pANTLR3_TOKEN_FACTORY +antlr3TokenFactoryNew(pANTLR3_INPUT_STREAM input) +{ + pANTLR3_TOKEN_FACTORY factory; + + /* allocate memory + */ + factory = (pANTLR3_TOKEN_FACTORY) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_TOKEN_FACTORY)); + + if (factory == NULL) + { + return NULL; + } + + /* Install factory API + */ + factory->newToken = newPoolToken; + factory->close = factoryClose; + factory->setInputStream = setInputStream; + factory->reset = factoryReset; + + /* Allocate the initial pool + */ + factory->thisPool = -1; + factory->pools = NULL; + factory->maxPool = -1; + newPool(factory); + + /* Factory space is good, we now want to initialize our cheating token + * which one it is initialized is the model for all tokens we manufacture + */ + antlr3SetTokenAPI(&factory->unTruc); + + /* Set some initial variables for future copying + */ + factory->unTruc.factoryMade = ANTLR3_TRUE; + + // Input stream + // + setInputStream(factory, input); + + return factory; + +} + +static void +setInputStream (pANTLR3_TOKEN_FACTORY factory, pANTLR3_INPUT_STREAM input) +{ + factory->input = input; + factory->unTruc.input = input; + if (input != NULL) + { + factory->unTruc.strFactory = input->strFactory; + } + else + { + factory->unTruc.strFactory = NULL; + } +} + +static void +newPool(pANTLR3_TOKEN_FACTORY factory) +{ + /* Increment factory count + */ + factory->thisPool++; + + // If we were reusing this token factory then we may already have a pool + // allocated. If we exceeded the max avaible then we must allocate a new + // one. + if (factory->thisPool > factory->maxPool) + { + /* Ensure we have enough pointers allocated + */ + factory->pools = (pANTLR3_COMMON_TOKEN *) + ANTLR3_REALLOC( (void *)factory->pools, /* Current pools pointer (starts at NULL) */ + (ANTLR3_UINT32)((factory->thisPool + 1) * sizeof(pANTLR3_COMMON_TOKEN *)) /* Memory for new pool pointers */ + ); + + /* Allocate a new pool for the factory + */ + factory->pools[factory->thisPool] = + (pANTLR3_COMMON_TOKEN) + ANTLR3_CALLOC(1, (size_t)(sizeof(ANTLR3_COMMON_TOKEN) * ANTLR3_FACTORY_POOL_SIZE)); + + // We now have a new pool and can track it as the maximum we have created so far + // + factory->maxPool = factory->thisPool; + } + + /* Reset the counters + */ + factory->nextToken = 0; + + /* Done + */ + return; +} + +static pANTLR3_COMMON_TOKEN +newPoolToken(pANTLR3_TOKEN_FACTORY factory) +{ + pANTLR3_COMMON_TOKEN token; + + /* See if we need a new token pool before allocating a new + * one + */ + if (factory->nextToken >= ANTLR3_FACTORY_POOL_SIZE) + { + /* We ran out of tokens in the current pool, so we need a new pool + */ + newPool(factory); + } + + /* Assuming everything went well (we are trying for performance here so doing minimal + * error checking. Then we can work out what the pointer is to the next token. + */ + token = factory->pools[factory->thisPool] + factory->nextToken; + factory->nextToken++; + + /* We have our token pointer now, so we can initialize it to the predefined model. + * We only need do this though if the token is not already initialized, we just check + * an api function pointer for this as they are allocated via calloc. + */ + if (token->setStartIndex == NULL) + { + antlr3SetTokenAPI(token); + + // It is factory made, and we need to copy the string factory pointer + // + token->factoryMade = ANTLR3_TRUE; + token->strFactory = factory->input == NULL ? NULL : factory->input->strFactory; + token->input = factory->input; + } + + /* And we are done + */ + return token; +} + +static void +factoryReset (pANTLR3_TOKEN_FACTORY factory) +{ + // Just start again with pool #0 when we are + // called. + // + factory->thisPool = -1; + newPool(factory); +} + +static void +factoryClose (pANTLR3_TOKEN_FACTORY factory) +{ + pANTLR3_COMMON_TOKEN pool; + ANTLR3_INT32 poolCount; + ANTLR3_UINT32 limit; + ANTLR3_UINT32 token; + pANTLR3_COMMON_TOKEN check; + + /* We iterate the token pools one at a time + */ + for (poolCount = 0; poolCount <= factory->thisPool; poolCount++) + { + /* Pointer to current pool + */ + pool = factory->pools[poolCount]; + + /* Work out how many tokens we need to check in this pool. + */ + limit = (poolCount == factory->thisPool ? factory->nextToken : ANTLR3_FACTORY_POOL_SIZE); + + /* Marginal condition, we might be at the start of a brand new pool + * where the nextToken is 0 and nothing has been allocated. + */ + if (limit > 0) + { + /* We have some tokens allocated from this pool + */ + for (token = 0; token < limit; token++) + { + /* Next one in the chain + */ + check = pool + token; + + /* If the programmer made this a custom token, then + * see if we need to call their free routine. + */ + if (check->custom != NULL && check->freeCustom != NULL) + { + check->freeCustom(check->custom); + check->custom = NULL; + } + } + } + + /* We can now free this pool allocation + */ + ANTLR3_FREE(factory->pools[poolCount]); + factory->pools[poolCount] = NULL; + } + + /* All the pools are deallocated we can free the pointers to the pools + * now. + */ + ANTLR3_FREE(factory->pools); + + /* Finally, we can free the space for the factory itself + */ + ANTLR3_FREE(factory); +} + + +static pANTLR3_COMMON_TOKEN +newToken(void) +{ + pANTLR3_COMMON_TOKEN token; + + /* Allocate memory for this + */ + token = (pANTLR3_COMMON_TOKEN) ANTLR3_CALLOC(1, (size_t)(sizeof(ANTLR3_COMMON_TOKEN))); + + if (token == NULL) + { + return NULL; + } + + // Install the API + // + antlr3SetTokenAPI(token); + token->factoryMade = ANTLR3_FALSE; + + return token; +} + +ANTLR3_API void +antlr3SetTokenAPI(pANTLR3_COMMON_TOKEN token) +{ + token->getText = getText; + token->setText = setText; + token->setText8 = setText8; + token->getType = getType; + token->setType = setType; + token->getLine = getLine; + token->setLine = setLine; + token->setLine = setLine; + token->getCharPositionInLine = getCharPositionInLine; + token->setCharPositionInLine = setCharPositionInLine; + token->getChannel = getChannel; + token->setChannel = setChannel; + token->getTokenIndex = getTokenIndex; + token->setTokenIndex = setTokenIndex; + token->getStartIndex = getStartIndex; + token->setStartIndex = setStartIndex; + token->getStopIndex = getStopIndex; + token->setStopIndex = setStopIndex; + token->toString = toString; + + return; +} + +static pANTLR3_STRING getText (pANTLR3_COMMON_TOKEN token) +{ + switch (token->textState) + { + case ANTLR3_TEXT_STRING: + + // Someone already created a string for this token, so we just + // use it. + // + return token->tokText.text; + break; + + case ANTLR3_TEXT_CHARP: + + // We had a straight text pointer installed, now we + // must convert it to a string. Note we have to do this here + // or otherwise setText8() will just install the same char* + // + if (token->strFactory != NULL) + { + token->tokText.text = token->strFactory->newStr8(token->strFactory, (pANTLR3_UINT8)token->tokText.chars); + token->textState = ANTLR3_TEXT_STRING; + return token->tokText.text; + } + else + { + // We cannot do anything here + // + return NULL; + } + break; + + default: + + // EOF is a special case + // + if (token->type == ANTLR3_TOKEN_EOF) + { + token->tokText.text = token->strFactory->newStr8(token->strFactory, (pANTLR3_UINT8)"<EOF>"); + token->textState = ANTLR3_TEXT_STRING; + token->tokText.text->factory = token->strFactory; + return token->tokText.text; + } + + + // We had nothing installed in the token, create a new string + // from the input stream + // + + if (token->input != NULL) + { + + return token->input->substr( token->input, + token->getStartIndex(token), + token->getStopIndex(token) + ); + } + + // Nothing to return, there is no input stream + // + return NULL; + break; + } +} +static void setText8 (pANTLR3_COMMON_TOKEN token, pANTLR3_UINT8 text) +{ + // No text to set, so ignore + // + if (text == NULL) return; + + switch (token->textState) + { + case ANTLR3_TEXT_NONE: + case ANTLR3_TEXT_CHARP: // Caller must free before setting again, if it needs to be freed + + // Nothing in there yet, or just a char *, so just set the + // text as a pointer + // + token->textState = ANTLR3_TEXT_CHARP; + token->tokText.chars = (pANTLR3_UCHAR)text; + break; + + default: + + // It was already a pANTLR3_STRING, so just override it + // + token->tokText.text->set8(token->tokText.text, (const char *)text); + break; + } + + // We are done + // + return; +} + +/** \brief Install the supplied text string as teh text for the token. + * The method assumes that the existing text (if any) was created by a factory + * and so does not attempt to release any memory it is using.Text not created + * by a string fctory (not advised) should be released prior to this call. + */ +static void setText (pANTLR3_COMMON_TOKEN token, pANTLR3_STRING text) +{ + // Merely replaces and existing pre-defined text with the supplied + // string + // + token->textState = ANTLR3_TEXT_STRING; + token->tokText.text = text; + + /* We are done + */ + return; +} + +static ANTLR3_UINT32 getType (pANTLR3_COMMON_TOKEN token) +{ + return token->type; +} + +static void setType (pANTLR3_COMMON_TOKEN token, ANTLR3_UINT32 type) +{ + token->type = type; +} + +static ANTLR3_UINT32 getLine (pANTLR3_COMMON_TOKEN token) +{ + return token->line; +} + +static void setLine (pANTLR3_COMMON_TOKEN token, ANTLR3_UINT32 line) +{ + token->line = line; +} + +static ANTLR3_INT32 getCharPositionInLine (pANTLR3_COMMON_TOKEN token) +{ + return token->charPosition; +} + +static void setCharPositionInLine (pANTLR3_COMMON_TOKEN token, ANTLR3_INT32 pos) +{ + token->charPosition = pos; +} + +static ANTLR3_UINT32 getChannel (pANTLR3_COMMON_TOKEN token) +{ + return token->channel; +} + +static void setChannel (pANTLR3_COMMON_TOKEN token, ANTLR3_UINT32 channel) +{ + token->channel = channel; +} + +static ANTLR3_MARKER getTokenIndex (pANTLR3_COMMON_TOKEN token) +{ + return token->index; +} + +static void setTokenIndex (pANTLR3_COMMON_TOKEN token, ANTLR3_MARKER index) +{ + token->index = index; +} + +static ANTLR3_MARKER getStartIndex (pANTLR3_COMMON_TOKEN token) +{ + return token->start == -1 ? (ANTLR3_MARKER)(token->input->data) : token->start; +} + +static void setStartIndex (pANTLR3_COMMON_TOKEN token, ANTLR3_MARKER start) +{ + token->start = start; +} + +static ANTLR3_MARKER getStopIndex (pANTLR3_COMMON_TOKEN token) +{ + return token->stop; +} + +static void setStopIndex (pANTLR3_COMMON_TOKEN token, ANTLR3_MARKER stop) +{ + token->stop = stop; +} + +static pANTLR3_STRING toString (pANTLR3_COMMON_TOKEN token) +{ + pANTLR3_STRING text; + pANTLR3_STRING outtext; + + text = token->getText(token); + + if (text == NULL) + { + return NULL; + } + + if (text->factory == NULL) + { + return text; // This usally means it is the EOF token + } + + /* A new empty string to assemble all the stuff in + */ + outtext = text->factory->newRaw(text->factory); + + /* Now we use our handy dandy string utility to assemble the + * the reporting string + * return "[@"+getTokenIndex()+","+start+":"+stop+"='"+txt+"',<"+type+">"+channelStr+","+line+":"+getCharPositionInLine()+"]"; + */ + outtext->append8(outtext, "[Index: "); + outtext->addi (outtext, (ANTLR3_INT32)token->getTokenIndex(token)); + outtext->append8(outtext, " (Start: "); + outtext->addi (outtext, (ANTLR3_INT32)token->getStartIndex(token)); + outtext->append8(outtext, "-Stop: "); + outtext->addi (outtext, (ANTLR3_INT32)token->getStopIndex(token)); + outtext->append8(outtext, ") ='"); + outtext->appendS(outtext, text); + outtext->append8(outtext, "', type<"); + outtext->addi (outtext, token->type); + outtext->append8(outtext, "> "); + + if (token->getChannel(token) > ANTLR3_TOKEN_DEFAULT_CHANNEL) + { + outtext->append8(outtext, "(channel = "); + outtext->addi (outtext, (ANTLR3_INT32)token->getChannel(token)); + outtext->append8(outtext, ") "); + } + + outtext->append8(outtext, "Line: "); + outtext->addi (outtext, (ANTLR3_INT32)token->getLine(token)); + outtext->append8(outtext, " LinePos:"); + outtext->addi (outtext, token->getCharPositionInLine(token)); + outtext->addc (outtext, ']'); + + return outtext; +} + diff --git a/impl/antlr/libantlr3c-3.4/src/antlr3commontree.c b/impl/antlr/libantlr3c-3.4/src/antlr3commontree.c new file mode 100644 index 0000000..65de38f --- /dev/null +++ b/impl/antlr/libantlr3c-3.4/src/antlr3commontree.c @@ -0,0 +1,547 @@ +// \file +// +// Implementation of ANTLR3 CommonTree, which you can use as a +// starting point for your own tree. Though it is often easier just to tag things on +// to the user pointer in the tree unless you are building a different type +// of structure. +// + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <antlr3commontree.h> + + +static pANTLR3_COMMON_TOKEN getToken (pANTLR3_BASE_TREE tree); +static pANTLR3_BASE_TREE dupNode (pANTLR3_BASE_TREE tree); +static ANTLR3_BOOLEAN isNilNode (pANTLR3_BASE_TREE tree); +static ANTLR3_UINT32 getType (pANTLR3_BASE_TREE tree); +static pANTLR3_STRING getText (pANTLR3_BASE_TREE tree); +static ANTLR3_UINT32 getLine (pANTLR3_BASE_TREE tree); +static ANTLR3_UINT32 getCharPositionInLine (pANTLR3_BASE_TREE tree); +static pANTLR3_STRING toString (pANTLR3_BASE_TREE tree); +static pANTLR3_BASE_TREE getParent (pANTLR3_BASE_TREE tree); +static void setParent (pANTLR3_BASE_TREE tree, pANTLR3_BASE_TREE parent); +static void setChildIndex (pANTLR3_BASE_TREE tree, ANTLR3_INT32 i); +static ANTLR3_INT32 getChildIndex (pANTLR3_BASE_TREE tree); +static void createChildrenList (pANTLR3_BASE_TREE tree); +static void reuse (pANTLR3_BASE_TREE tree); + +// Factory functions for the Arboretum +// +static void newPool (pANTLR3_ARBORETUM factory); +static pANTLR3_BASE_TREE newPoolTree (pANTLR3_ARBORETUM factory); +static pANTLR3_BASE_TREE newFromTree (pANTLR3_ARBORETUM factory, pANTLR3_COMMON_TREE tree); +static pANTLR3_BASE_TREE newFromToken (pANTLR3_ARBORETUM factory, pANTLR3_COMMON_TOKEN token); +static void factoryClose (pANTLR3_ARBORETUM factory); + +ANTLR3_API pANTLR3_ARBORETUM +antlr3ArboretumNew(pANTLR3_STRING_FACTORY strFactory) +{ + pANTLR3_ARBORETUM factory; + + // Allocate memory + // + factory = (pANTLR3_ARBORETUM) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_ARBORETUM)); + if (factory == NULL) + { + return NULL; + } + + // Install a vector factory to create, track and free() any child + // node lists. + // + factory->vFactory = antlr3VectorFactoryNew(0); + if (factory->vFactory == NULL) + { + free(factory); + return NULL; + } + + // We also keep a reclaim stack, so that any Nil nodes that are + // orphaned are not just left in the pool but are reused, other wise + // we create 6 times as many nilNodes as ordinary nodes and use loads of + // memory. Perhaps at some point, the analysis phase will generate better + // code and we won't need to do this here. + // + factory->nilStack = antlr3StackNew(0); + + // Install factory API + // + factory->newTree = newPoolTree; + factory->newFromTree = newFromTree; + factory->newFromToken = newFromToken; + factory->close = factoryClose; + + // Allocate the initial pool + // + factory->thisPool = -1; + factory->pools = NULL; + newPool(factory); + + // Factory space is good, we now want to initialize our cheating token + // which one it is initialized is the model for all tokens we manufacture + // + antlr3SetCTAPI(&factory->unTruc); + + // Set some initial variables for future copying, including a string factory + // that we can use later for converting trees to strings. + // + factory->unTruc.factory = factory; + factory->unTruc.baseTree.strFactory = strFactory; + + return factory; + +} + +static void +newPool(pANTLR3_ARBORETUM factory) +{ + // Increment factory count + // + factory->thisPool++; + + // Ensure we have enough pointers allocated + // + factory->pools = (pANTLR3_COMMON_TREE *) + ANTLR3_REALLOC( (void *)factory->pools, // Current pools pointer (starts at NULL) + (ANTLR3_UINT32)((factory->thisPool + 1) * sizeof(pANTLR3_COMMON_TREE *)) // Memory for new pool pointers + ); + + // Allocate a new pool for the factory + // + factory->pools[factory->thisPool] = + (pANTLR3_COMMON_TREE) + ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_COMMON_TREE) * ANTLR3_FACTORY_POOL_SIZE)); + + + // Reset the counters + // + factory->nextTree = 0; + + // Done + // + return; +} + +static pANTLR3_BASE_TREE +newPoolTree (pANTLR3_ARBORETUM factory) +{ + pANTLR3_COMMON_TREE tree; + + // If we have anything on the re claim stack, reuse that sucker first + // + tree = factory->nilStack->peek(factory->nilStack); + + if (tree != NULL) + { + // Cool we got something we could reuse, it will have been cleaned up by + // whatever put it back on the stack (for instance if it had a child vector, + // that will have been cleared to hold zero entries and that vector will get reused too. + // It is the basetree pointer that is placed on the stack of course + // + factory->nilStack->pop(factory->nilStack); + return (pANTLR3_BASE_TREE)tree; + + } + // See if we need a new tree pool before allocating a new tree + // + if (factory->nextTree >= ANTLR3_FACTORY_POOL_SIZE) + { + // We ran out of tokens in the current pool, so we need a new pool + // + newPool(factory); + } + + // Assuming everything went well - we are trying for performance here so doing minimal + // error checking - then we can work out what the pointer is to the next commontree. + // + tree = factory->pools[factory->thisPool] + factory->nextTree; + factory->nextTree++; + + // We have our token pointer now, so we can initialize it to the predefined model. + // + antlr3SetCTAPI(tree); + + // Set some initial variables for future copying, including a string factory + // that we can use later for converting trees to strings. + // + tree->factory = factory; + tree->baseTree.strFactory = factory->unTruc.baseTree.strFactory; + + // The super points to the common tree so we must override the one used by + // by the pre-built tree as otherwise we will always poitn to the same initial + // common tree and we might spend 3 hours trying to debug why - this would never + // happen to me of course! :-( + // + tree->baseTree.super = tree; + + + // And we are done + // + return &(tree->baseTree); +} + + +static pANTLR3_BASE_TREE +newFromTree(pANTLR3_ARBORETUM factory, pANTLR3_COMMON_TREE tree) +{ + pANTLR3_BASE_TREE newTree; + + newTree = factory->newTree(factory); + + if (newTree == NULL) + { + return NULL; + } + + // Pick up the payload we had in the supplied tree + // + ((pANTLR3_COMMON_TREE)(newTree->super))->token = tree->token; + newTree->u = tree->baseTree.u; // Copy any user pointer + + return newTree; +} + +static pANTLR3_BASE_TREE +newFromToken(pANTLR3_ARBORETUM factory, pANTLR3_COMMON_TOKEN token) +{ + pANTLR3_BASE_TREE newTree; + + newTree = factory->newTree(factory); + + if (newTree == NULL) + { + return NULL; + } + + // Pick up the payload we had in the supplied tree + // + ((pANTLR3_COMMON_TREE)(newTree->super))->token = token; + + return newTree; +} + +static void +factoryClose (pANTLR3_ARBORETUM factory) +{ + ANTLR3_INT32 poolCount; + + // First close the vector factory that supplied all the child pointer + // vectors. + // + factory->vFactory->close(factory->vFactory); + + if (factory->nilStack != NULL) + { + factory->nilStack->free(factory->nilStack); + } + + // We now JUST free the pools because the C runtime CommonToken based tree + // cannot contain anything that was not made by this factory. + // + for (poolCount = 0; poolCount <= factory->thisPool; poolCount++) + { + // We can now free this pool allocation + // + ANTLR3_FREE(factory->pools[poolCount]); + factory->pools[poolCount] = NULL; + } + + // All the pools are deallocated we can free the pointers to the pools + // now. + // + ANTLR3_FREE(factory->pools); + + // Finally, we can free the space for the factory itself + // + ANTLR3_FREE(factory); +} + + +ANTLR3_API void +antlr3SetCTAPI(pANTLR3_COMMON_TREE tree) +{ + // Init base tree + // + antlr3BaseTreeNew(&(tree->baseTree)); + + // We need a pointer to ourselves for + // the payload and few functions that we + // provide. + // + tree->baseTree.super = tree; + + // Common tree overrides + + tree->baseTree.isNilNode = isNilNode; + tree->baseTree.toString = toString; + tree->baseTree.dupNode = (void *(*)(pANTLR3_BASE_TREE))(dupNode); + tree->baseTree.getLine = getLine; + tree->baseTree.getCharPositionInLine = getCharPositionInLine; + tree->baseTree.toString = toString; + tree->baseTree.getType = getType; + tree->baseTree.getText = getText; + tree->baseTree.getToken = getToken; + tree->baseTree.getParent = getParent; + tree->baseTree.setParent = setParent; + tree->baseTree.setChildIndex = setChildIndex; + tree->baseTree.getChildIndex = getChildIndex; + tree->baseTree.createChildrenList = createChildrenList; + tree->baseTree.reuse = reuse; + tree->baseTree.free = NULL; // Factory trees have no free function + tree->baseTree.u = NULL; // Initialize user pointer + + tree->baseTree.children = NULL; + + tree->token = NULL; // No token as yet + tree->startIndex = 0; + tree->stopIndex = 0; + tree->parent = NULL; // No parent yet + tree->childIndex = -1; + + return; +} + +// -------------------------------------- +// Non factory node constructors. +// + +ANTLR3_API pANTLR3_COMMON_TREE +antlr3CommonTreeNew() +{ + pANTLR3_COMMON_TREE tree; + tree = ANTLR3_CALLOC(1, sizeof(ANTLR3_COMMON_TREE)); + + if (tree == NULL) + { + return NULL; + } + + antlr3SetCTAPI(tree); + + return tree; +} + +ANTLR3_API pANTLR3_COMMON_TREE +antlr3CommonTreeNewFromToken(pANTLR3_COMMON_TOKEN token) +{ + pANTLR3_COMMON_TREE newTree; + + newTree = antlr3CommonTreeNew(); + + if (newTree == NULL) + { + return NULL; + } + + //Pick up the payload we had in the supplied tree + // + newTree->token = token; + return newTree; +} + +/// Create a new vector for holding child nodes using the inbuilt +/// vector factory. +/// +static void +createChildrenList (pANTLR3_BASE_TREE tree) +{ + tree->children = ((pANTLR3_COMMON_TREE)(tree->super))->factory->vFactory->newVector(((pANTLR3_COMMON_TREE)(tree->super))->factory->vFactory); +} + + +static pANTLR3_COMMON_TOKEN +getToken (pANTLR3_BASE_TREE tree) +{ + // The token is the payload of the common tree or other implementor + // so it is stored within ourselves, which is the super pointer.Note + // that whatever the actual token is, it is passed around by its pointer + // to the common token implementation, which it may of course surround + // with its own super structure. + // + return ((pANTLR3_COMMON_TREE)(tree->super))->token; +} + +static pANTLR3_BASE_TREE +dupNode (pANTLR3_BASE_TREE tree) +{ + // The node we are duplicating is in fact the common tree (that's why we are here) + // so we use the super pointer to duplicate. + // + pANTLR3_COMMON_TREE theOld; + + theOld = (pANTLR3_COMMON_TREE)(tree->super); + + // The pointer we return is the base implementation of course + // + return theOld->factory->newFromTree(theOld->factory, theOld); +} + +static ANTLR3_BOOLEAN +isNilNode (pANTLR3_BASE_TREE tree) +{ + // This is a Nil tree if it has no payload (Token in our case) + // + if (((pANTLR3_COMMON_TREE)(tree->super))->token == NULL) + { + return ANTLR3_TRUE; + } + else + { + return ANTLR3_FALSE; + } +} + +static ANTLR3_UINT32 +getType (pANTLR3_BASE_TREE tree) +{ + pANTLR3_COMMON_TREE theTree; + + theTree = (pANTLR3_COMMON_TREE)(tree->super); + + if (theTree->token == NULL) + { + return 0; + } + else + { + return theTree->token->getType(theTree->token); + } +} + +static pANTLR3_STRING +getText (pANTLR3_BASE_TREE tree) +{ + return tree->toString(tree); +} + +static ANTLR3_UINT32 getLine (pANTLR3_BASE_TREE tree) +{ + pANTLR3_COMMON_TREE cTree; + pANTLR3_COMMON_TOKEN token; + + cTree = (pANTLR3_COMMON_TREE)(tree->super); + + token = cTree->token; + + if (token == NULL || token->getLine(token) == 0) + { + if (tree->getChildCount(tree) > 0) + { + pANTLR3_BASE_TREE child; + + child = (pANTLR3_BASE_TREE)tree->getChild(tree, 0); + return child->getLine(child); + } + return 0; + } + return token->getLine(token); +} + +static ANTLR3_UINT32 getCharPositionInLine (pANTLR3_BASE_TREE tree) +{ + pANTLR3_COMMON_TOKEN token; + + token = ((pANTLR3_COMMON_TREE)(tree->super))->token; + + if (token == NULL || token->getCharPositionInLine(token) == -1) + { + if (tree->getChildCount(tree) > 0) + { + pANTLR3_BASE_TREE child; + + child = (pANTLR3_BASE_TREE)tree->getChild(tree, 0); + + return child->getCharPositionInLine(child); + } + return 0; + } + return token->getCharPositionInLine(token); +} + +static pANTLR3_STRING toString (pANTLR3_BASE_TREE tree) +{ + if (tree->isNilNode(tree) == ANTLR3_TRUE) + { + pANTLR3_STRING nilNode; + + nilNode = tree->strFactory->newPtr(tree->strFactory, (pANTLR3_UINT8)"nil", 3); + + return nilNode; + } + + return ((pANTLR3_COMMON_TREE)(tree->super))->token->getText(((pANTLR3_COMMON_TREE)(tree->super))->token); +} + +static pANTLR3_BASE_TREE +getParent (pANTLR3_BASE_TREE tree) +{ + return & (((pANTLR3_COMMON_TREE)(tree->super))->parent->baseTree); +} + +static void +setParent (pANTLR3_BASE_TREE tree, pANTLR3_BASE_TREE parent) +{ + ((pANTLR3_COMMON_TREE)(tree->super))->parent = parent == NULL ? NULL : ((pANTLR3_COMMON_TREE)(parent->super))->parent; +} + +static void +setChildIndex (pANTLR3_BASE_TREE tree, ANTLR3_INT32 i) +{ + ((pANTLR3_COMMON_TREE)(tree->super))->childIndex = i; +} +static ANTLR3_INT32 +getChildIndex (pANTLR3_BASE_TREE tree ) +{ + return ((pANTLR3_COMMON_TREE)(tree->super))->childIndex; +} + +/** Clean up any child vector that the tree might have, so it can be reused, + * then add it into the reuse stack. + */ +static void +reuse (pANTLR3_BASE_TREE tree) +{ + pANTLR3_COMMON_TREE cTree; + + cTree = (pANTLR3_COMMON_TREE)(tree->super); + + if (cTree->factory != NULL) + { + + if (cTree->baseTree.children != NULL) + { + + cTree->baseTree.children->clear(cTree->baseTree.children); + } + cTree->factory->nilStack->push(cTree->factory->nilStack, tree, NULL); + + } +} diff --git a/impl/antlr/libantlr3c-3.4/src/antlr3commontreeadaptor.c b/impl/antlr/libantlr3c-3.4/src/antlr3commontreeadaptor.c new file mode 100644 index 0000000..abce6f0 --- /dev/null +++ b/impl/antlr/libantlr3c-3.4/src/antlr3commontreeadaptor.c @@ -0,0 +1,496 @@ +/** \file + * This is the standard tree adaptor used by the C runtime unless the grammar + * source file says to use anything different. It embeds a BASE_TREE to which + * it adds its own implementation of anything that the base tree is not + * good for, plus a number of methods that any other adaptor type + * needs to implement too. + * \ingroup pANTLR3_COMMON_TREE_ADAPTOR + */ + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <antlr3commontreeadaptor.h> + +#ifdef ANTLR3_WINDOWS +#pragma warning( disable : 4100 ) +#endif + +/* BASE_TREE_ADAPTOR overrides... */ +static pANTLR3_BASE_TREE dupNode (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE treeNode); +static pANTLR3_BASE_TREE create (pANTLR3_BASE_TREE_ADAPTOR adpator, pANTLR3_COMMON_TOKEN payload); +static pANTLR3_BASE_TREE dbgCreate (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_COMMON_TOKEN payload); +static pANTLR3_COMMON_TOKEN createToken (pANTLR3_BASE_TREE_ADAPTOR adaptor, ANTLR3_UINT32 tokenType, pANTLR3_UINT8 text); +static pANTLR3_COMMON_TOKEN createTokenFromToken (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_COMMON_TOKEN fromToken); +static pANTLR3_COMMON_TOKEN getToken (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t); +static pANTLR3_STRING getText (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t); +static ANTLR3_UINT32 getType (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t); +static pANTLR3_BASE_TREE getChild (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, ANTLR3_UINT32 i); +static ANTLR3_UINT32 getChildCount (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t); +static void replaceChildren (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE parent, ANTLR3_INT32 startChildIndex, ANTLR3_INT32 stopChildIndex, pANTLR3_BASE_TREE t); +static void setDebugEventListener (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_DEBUG_EVENT_LISTENER debugger); +static void setChildIndex (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, ANTLR3_INT32 i); +static ANTLR3_INT32 getChildIndex (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t); +static void setParent (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE child, pANTLR3_BASE_TREE parent); +static pANTLR3_BASE_TREE getParent (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE child); +static void setChild (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, ANTLR3_UINT32 i, pANTLR3_BASE_TREE child); +static void deleteChild (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, ANTLR3_UINT32 i); +static pANTLR3_BASE_TREE errorNode (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_TOKEN_STREAM ctnstream, pANTLR3_COMMON_TOKEN startToken, pANTLR3_COMMON_TOKEN stopToken, pANTLR3_EXCEPTION e); +/* Methods specific to each tree adaptor + */ +static void setTokenBoundaries (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, pANTLR3_COMMON_TOKEN startToken, pANTLR3_COMMON_TOKEN stopToken); +static void dbgSetTokenBoundaries (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, pANTLR3_COMMON_TOKEN startToken, pANTLR3_COMMON_TOKEN stopToken); +static ANTLR3_MARKER getTokenStartIndex (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t); +static ANTLR3_MARKER getTokenStopIndex (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t); + +static void ctaFree (pANTLR3_BASE_TREE_ADAPTOR adaptor); + +/** Create a new tree adaptor. Note that despite the fact that this is + * creating a new COMMON_TREE adaptor, we return the address of the + * BASE_TREE interface, as should any other adaptor that wishes to be + * used as the tree element of a tree parse/build. It needs to be given the + * address of a valid string factory as we do not know what the originating + * input stream encoding type was. This way we can rely on just using + * the original input stream's string factory or one of the correct type + * which the user supplies us. + */ +ANTLR3_API pANTLR3_BASE_TREE_ADAPTOR +ANTLR3_TREE_ADAPTORNew(pANTLR3_STRING_FACTORY strFactory) +{ + pANTLR3_COMMON_TREE_ADAPTOR cta; + + // First job is to create the memory we need for the tree adaptor interface. + // + cta = (pANTLR3_COMMON_TREE_ADAPTOR) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_COMMON_TREE_ADAPTOR))); + + if (cta == NULL) + { + return NULL; + } + + // Memory is initialized, so initialize the base tree adaptor + // + antlr3BaseTreeAdaptorInit(&(cta->baseAdaptor), NULL); + + // Install our interface overrides. Strangeness is to allow generated code to treat them + // as returning void * + // + cta->baseAdaptor.dupNode = (void * (*) (pANTLR3_BASE_TREE_ADAPTOR, void *)) + dupNode; + cta->baseAdaptor.create = (void * (*) (pANTLR3_BASE_TREE_ADAPTOR, pANTLR3_COMMON_TOKEN)) + create; + cta->baseAdaptor.createToken = + createToken; + cta->baseAdaptor.createTokenFromToken = + createTokenFromToken; + cta->baseAdaptor.setTokenBoundaries = (void (*) (pANTLR3_BASE_TREE_ADAPTOR, void *, pANTLR3_COMMON_TOKEN, pANTLR3_COMMON_TOKEN)) + setTokenBoundaries; + cta->baseAdaptor.getTokenStartIndex = (ANTLR3_MARKER (*) (pANTLR3_BASE_TREE_ADAPTOR, void *)) + getTokenStartIndex; + cta->baseAdaptor.getTokenStopIndex = (ANTLR3_MARKER (*) (pANTLR3_BASE_TREE_ADAPTOR, void *)) + getTokenStopIndex; + cta->baseAdaptor.getText = (pANTLR3_STRING (*) (pANTLR3_BASE_TREE_ADAPTOR, void *)) + getText; + cta->baseAdaptor.getType = (ANTLR3_UINT32 (*) (pANTLR3_BASE_TREE_ADAPTOR, void *)) + getType; + cta->baseAdaptor.getChild = (void * (*) (pANTLR3_BASE_TREE_ADAPTOR, void *, ANTLR3_UINT32)) + getChild; + cta->baseAdaptor.setChild = (void (*) (pANTLR3_BASE_TREE_ADAPTOR, void *, ANTLR3_UINT32, void *)) + setChild; + cta->baseAdaptor.setParent = (void (*) (pANTLR3_BASE_TREE_ADAPTOR, void *, void *)) + setParent; + cta->baseAdaptor.getParent = (void * (*) (pANTLR3_BASE_TREE_ADAPTOR, void *)) + getParent; + cta->baseAdaptor.setChildIndex = (void (*) (pANTLR3_BASE_TREE_ADAPTOR, void *, ANTLR3_UINT32)) + setChildIndex; + cta->baseAdaptor.deleteChild = (void (*) (pANTLR3_BASE_TREE_ADAPTOR, void *, ANTLR3_UINT32)) + deleteChild; + cta->baseAdaptor.getChildCount = (ANTLR3_UINT32 (*) (pANTLR3_BASE_TREE_ADAPTOR, void *)) + getChildCount; + cta->baseAdaptor.getChildIndex = (ANTLR3_INT32 (*) (pANTLR3_BASE_TREE_ADAPTOR, void *)) + getChildIndex; + cta->baseAdaptor.free = (void (*) (pANTLR3_BASE_TREE_ADAPTOR)) + ctaFree; + cta->baseAdaptor.setDebugEventListener = + setDebugEventListener; + cta->baseAdaptor.replaceChildren = (void (*) (pANTLR3_BASE_TREE_ADAPTOR, void *, ANTLR3_INT32, ANTLR3_INT32, void *)) + replaceChildren; + cta->baseAdaptor.errorNode = (void * (*) (pANTLR3_BASE_TREE_ADAPTOR, pANTLR3_TOKEN_STREAM, pANTLR3_COMMON_TOKEN, pANTLR3_COMMON_TOKEN, pANTLR3_EXCEPTION)) + errorNode; + + // Install the super class pointer + // + cta->baseAdaptor.super = cta; + + // Install a tree factory for creating new tree nodes + // + cta->arboretum = antlr3ArboretumNew(strFactory); + + // Install a token factory for imaginary tokens, these imaginary + // tokens do not require access to the input stream so we can + // dummy the creation of it, but they will need a string factory. + // + cta->baseAdaptor.tokenFactory = antlr3TokenFactoryNew(NULL); + cta->baseAdaptor.tokenFactory->unTruc.strFactory = strFactory; + + // Allow the base tree adaptor to share the tree factory's string factory. + // + cta->baseAdaptor.strFactory = strFactory; + + // Return the address of the base adaptor interface. + // + return &(cta->baseAdaptor); +} + +/// Debugging version of the tree adaptor (not normally called as generated code +/// calls setDebugEventListener instead which changes a normal token stream to +/// a debugging stream and means that a user's instantiation code does not need +/// to be changed just to debug with AW. +/// +ANTLR3_API pANTLR3_BASE_TREE_ADAPTOR +ANTLR3_TREE_ADAPTORDebugNew(pANTLR3_STRING_FACTORY strFactory, pANTLR3_DEBUG_EVENT_LISTENER debugger) +{ + pANTLR3_BASE_TREE_ADAPTOR ta; + + // Create a normal one first + // + ta = ANTLR3_TREE_ADAPTORNew(strFactory); + + if (ta != NULL) + { + // Reinitialize as a debug version + // + antlr3BaseTreeAdaptorInit(ta, debugger); + ta->create = (void * (*) (pANTLR3_BASE_TREE_ADAPTOR, pANTLR3_COMMON_TOKEN)) + dbgCreate; + ta->setTokenBoundaries = (void (*) (pANTLR3_BASE_TREE_ADAPTOR, void *, pANTLR3_COMMON_TOKEN, pANTLR3_COMMON_TOKEN)) + dbgSetTokenBoundaries; + } + + return ta; +} + +/// Causes an existing common tree adaptor to become a debug version +/// +static void +setDebugEventListener (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_DEBUG_EVENT_LISTENER debugger) +{ + // Reinitialize as a debug version + // + antlr3BaseTreeAdaptorInit(adaptor, debugger); + + adaptor->create = (void * (*) (pANTLR3_BASE_TREE_ADAPTOR, pANTLR3_COMMON_TOKEN)) + dbgCreate; + adaptor->setTokenBoundaries = (void (*) (pANTLR3_BASE_TREE_ADAPTOR, void *, pANTLR3_COMMON_TOKEN, pANTLR3_COMMON_TOKEN)) + dbgSetTokenBoundaries; + +} + +static void +ctaFree(pANTLR3_BASE_TREE_ADAPTOR adaptor) +{ + pANTLR3_COMMON_TREE_ADAPTOR cta; + + cta = (pANTLR3_COMMON_TREE_ADAPTOR)(adaptor->super); + + /* Free the tree factory we created + */ + cta->arboretum->close(((pANTLR3_COMMON_TREE_ADAPTOR)(adaptor->super))->arboretum); + + /* Free the token factory we created + */ + adaptor->tokenFactory->close(adaptor->tokenFactory); + + /* Free the super pointer, as it is this that was allocated + * and is the common tree structure. + */ + ANTLR3_FREE(adaptor->super); +} + +/* BASE_TREE_ADAPTOR overrides */ + +static pANTLR3_BASE_TREE +errorNode (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_TOKEN_STREAM ctnstream, pANTLR3_COMMON_TOKEN startToken, pANTLR3_COMMON_TOKEN stopToken, pANTLR3_EXCEPTION e) +{ + // Use the supplied common tree node stream to get another tree from the factory + // TODO: Look at creating the erronode as in Java, but this is complicated by the + // need to track and free the memory allocated to it, so for now, we just + // want something in the tree that isn't a NULL pointer. + // + return adaptor->createTypeText(adaptor, ANTLR3_TOKEN_INVALID, (pANTLR3_UINT8)"Tree Error Node"); + +} + +/** Duplicate the supplied node. + */ +static pANTLR3_BASE_TREE +dupNode (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE treeNode) +{ + return treeNode == NULL ? NULL : treeNode->dupNode(treeNode); +} + +static pANTLR3_BASE_TREE +create (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_COMMON_TOKEN payload) +{ + pANTLR3_BASE_TREE ct; + + /* Create a new common tree as this is what this adaptor deals with + */ + ct = ((pANTLR3_COMMON_TREE_ADAPTOR)(adaptor->super))->arboretum->newFromToken(((pANTLR3_COMMON_TREE_ADAPTOR)(adaptor->super))->arboretum, payload); + + /* But all adaptors return the pointer to the base interface. + */ + return ct; +} +static pANTLR3_BASE_TREE +dbgCreate (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_COMMON_TOKEN payload) +{ + pANTLR3_BASE_TREE ct; + + ct = create(adaptor, payload); + adaptor->debugger->createNode(adaptor->debugger, ct); + + return ct; +} + +/** Tell me how to create a token for use with imaginary token nodes. + * For example, there is probably no input symbol associated with imaginary + * token DECL, but you need to create it as a payload or whatever for + * the DECL node as in ^(DECL type ID). + * + * If you care what the token payload objects' type is, you should + * override this method and any other createToken variant. + */ +static pANTLR3_COMMON_TOKEN +createToken (pANTLR3_BASE_TREE_ADAPTOR adaptor, ANTLR3_UINT32 tokenType, pANTLR3_UINT8 text) +{ + pANTLR3_COMMON_TOKEN newToken; + + newToken = adaptor->tokenFactory->newToken(adaptor->tokenFactory); + + if (newToken != NULL) + { + newToken->textState = ANTLR3_TEXT_CHARP; + newToken->tokText.chars = (pANTLR3_UCHAR)text; + newToken->setType(newToken, tokenType); + newToken->input = adaptor->tokenFactory->input; + newToken->strFactory = adaptor->strFactory; + } + return newToken; +} + +/** Tell me how to create a token for use with imaginary token nodes. + * For example, there is probably no input symbol associated with imaginary + * token DECL, but you need to create it as a payload or whatever for + * the DECL node as in ^(DECL type ID). + * + * This is a variant of createToken where the new token is derived from + * an actual real input token. Typically this is for converting '{' + * tokens to BLOCK etc... You'll see + * + * r : lc='{' ID+ '}' -> ^(BLOCK[$lc] ID+) ; + * + * If you care what the token payload objects' type is, you should + * override this method and any other createToken variant. + * + * NB: this being C it is not so easy to extend the types of creaeteToken. + * We will have to see if anyone needs to do this and add any variants to + * this interface. + */ +static pANTLR3_COMMON_TOKEN +createTokenFromToken (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_COMMON_TOKEN fromToken) +{ + pANTLR3_COMMON_TOKEN newToken; + + newToken = adaptor->tokenFactory->newToken(adaptor->tokenFactory); + + if (newToken != NULL) + { + // Create the text using our own string factory to avoid complicating + // commontoken. + // + pANTLR3_STRING text; + + newToken->toString = fromToken->toString; + + if (fromToken->textState == ANTLR3_TEXT_CHARP) + { + newToken->textState = ANTLR3_TEXT_CHARP; + newToken->tokText.chars = fromToken->tokText.chars; + } + else + { + text = fromToken->getText(fromToken); + newToken->textState = ANTLR3_TEXT_STRING; + newToken->tokText.text = adaptor->strFactory->newPtr(adaptor->strFactory, text->chars, text->len); + } + + newToken->setLine (newToken, fromToken->getLine(fromToken)); + newToken->setTokenIndex (newToken, fromToken->getTokenIndex(fromToken)); + newToken->setCharPositionInLine (newToken, fromToken->getCharPositionInLine(fromToken)); + newToken->setChannel (newToken, fromToken->getChannel(fromToken)); + newToken->setType (newToken, fromToken->getType(fromToken)); + } + + return newToken; +} + +/* Specific methods for a TreeAdaptor */ + +/** Track start/stop token for subtree root created for a rule. + * Only works with CommonTree nodes. For rules that match nothing, + * seems like this will yield start=i and stop=i-1 in a nil node. + * Might be useful info so I'll not force to be i..i. + */ +static void +setTokenBoundaries (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, pANTLR3_COMMON_TOKEN startToken, pANTLR3_COMMON_TOKEN stopToken) +{ + ANTLR3_MARKER start; + ANTLR3_MARKER stop; + + pANTLR3_COMMON_TREE ct; + + if (t == NULL) + { + return; + } + + if ( startToken != NULL) + { + start = startToken->getTokenIndex(startToken); + } + else + { + start = 0; + } + + if ( stopToken != NULL) + { + stop = stopToken->getTokenIndex(stopToken); + } + else + { + stop = 0; + } + + ct = (pANTLR3_COMMON_TREE)(t->super); + + ct->startIndex = start; + ct->stopIndex = stop; + +} +static void +dbgSetTokenBoundaries (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, pANTLR3_COMMON_TOKEN startToken, pANTLR3_COMMON_TOKEN stopToken) +{ + setTokenBoundaries(adaptor, t, startToken, stopToken); + + if (t != NULL && startToken != NULL && stopToken != NULL) + { + adaptor->debugger->setTokenBoundaries(adaptor->debugger, t, startToken->getTokenIndex(startToken), stopToken->getTokenIndex(stopToken)); + } +} + +static ANTLR3_MARKER +getTokenStartIndex (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t) +{ + return ((pANTLR3_COMMON_TREE)(t->super))->startIndex; +} + +static ANTLR3_MARKER +getTokenStopIndex (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t) +{ + return ((pANTLR3_COMMON_TREE)(t->super))->stopIndex; +} + +static pANTLR3_STRING +getText (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t) +{ + return t->getText(t); +} + +static ANTLR3_UINT32 +getType (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t) +{ + return t->getType(t); +} + +static void +replaceChildren +(pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE parent, ANTLR3_INT32 startChildIndex, ANTLR3_INT32 stopChildIndex, pANTLR3_BASE_TREE t) +{ + if (parent != NULL) + { + parent->replaceChildren(parent, startChildIndex, stopChildIndex, t); + } +} + +static pANTLR3_BASE_TREE +getChild (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, ANTLR3_UINT32 i) +{ + return t->getChild(t, i); +} +static void +setChild (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, ANTLR3_UINT32 i, pANTLR3_BASE_TREE child) +{ + t->setChild(t, i, child); +} + +static void +deleteChild (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, ANTLR3_UINT32 i) +{ + t->deleteChild(t, i); +} + +static ANTLR3_UINT32 +getChildCount (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t) +{ + return t->getChildCount(t); +} + +static void +setChildIndex (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t, ANTLR3_INT32 i) +{ + t->setChildIndex(t, i); +} + +static ANTLR3_INT32 +getChildIndex (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t) +{ + return t->getChildIndex(t); +} +static void +setParent (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE child, pANTLR3_BASE_TREE parent) +{ + child->setParent(child, parent); +} +static pANTLR3_BASE_TREE +getParent (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE child) +{ + return child->getParent(child); +} diff --git a/impl/antlr/libantlr3c-3.4/src/antlr3commontreenodestream.c b/impl/antlr/libantlr3c-3.4/src/antlr3commontreenodestream.c new file mode 100644 index 0000000..a759d34 --- /dev/null +++ b/impl/antlr/libantlr3c-3.4/src/antlr3commontreenodestream.c @@ -0,0 +1,968 @@ +/// \file +/// Defines the implementation of the common node stream the default +/// tree node stream used by ANTLR. +/// + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <antlr3commontreenodestream.h> + +#ifdef ANTLR3_WINDOWS +#pragma warning( disable : 4100 ) +#endif + +// COMMON TREE STREAM API +// +static void addNavigationNode (pANTLR3_COMMON_TREE_NODE_STREAM ctns, ANTLR3_UINT32 ttype); +static ANTLR3_BOOLEAN hasUniqueNavigationNodes (pANTLR3_COMMON_TREE_NODE_STREAM ctns); +static pANTLR3_BASE_TREE newDownNode (pANTLR3_COMMON_TREE_NODE_STREAM ctns); +static pANTLR3_BASE_TREE newUpNode (pANTLR3_COMMON_TREE_NODE_STREAM ctns); +static void reset (pANTLR3_COMMON_TREE_NODE_STREAM ctns); +static void push (pANTLR3_COMMON_TREE_NODE_STREAM ctns, ANTLR3_INT32 index); +static ANTLR3_INT32 pop (pANTLR3_COMMON_TREE_NODE_STREAM ctns); +//static ANTLR3_INT32 index (pANTLR3_COMMON_TREE_NODE_STREAM ctns); +static ANTLR3_UINT32 getLookaheadSize (pANTLR3_COMMON_TREE_NODE_STREAM ctns); +// TREE NODE STREAM API +// +static pANTLR3_BASE_TREE_ADAPTOR getTreeAdaptor (pANTLR3_TREE_NODE_STREAM tns); +static pANTLR3_BASE_TREE getTreeSource (pANTLR3_TREE_NODE_STREAM tns); +static pANTLR3_BASE_TREE _LT (pANTLR3_TREE_NODE_STREAM tns, ANTLR3_INT32 k); +static pANTLR3_BASE_TREE get (pANTLR3_TREE_NODE_STREAM tns, ANTLR3_INT32 k); +static void setUniqueNavigationNodes (pANTLR3_TREE_NODE_STREAM tns, ANTLR3_BOOLEAN uniqueNavigationNodes); +static pANTLR3_STRING toString (pANTLR3_TREE_NODE_STREAM tns); +static pANTLR3_STRING toStringSS (pANTLR3_TREE_NODE_STREAM tns, pANTLR3_BASE_TREE start, pANTLR3_BASE_TREE stop); +static void toStringWork (pANTLR3_TREE_NODE_STREAM tns, pANTLR3_BASE_TREE start, pANTLR3_BASE_TREE stop, pANTLR3_STRING buf); +static void replaceChildren (pANTLR3_TREE_NODE_STREAM tns, pANTLR3_BASE_TREE parent, ANTLR3_INT32 startChildIndex, ANTLR3_INT32 stopChildIndex, pANTLR3_BASE_TREE t); + +// INT STREAM API +// +static void consume (pANTLR3_INT_STREAM is); +static ANTLR3_MARKER tindex (pANTLR3_INT_STREAM is); +static ANTLR3_UINT32 _LA (pANTLR3_INT_STREAM is, ANTLR3_INT32 i); +static ANTLR3_MARKER mark (pANTLR3_INT_STREAM is); +static void release (pANTLR3_INT_STREAM is, ANTLR3_MARKER marker); +static void rewindMark (pANTLR3_INT_STREAM is, ANTLR3_MARKER marker); +static void rewindLast (pANTLR3_INT_STREAM is); +static void seek (pANTLR3_INT_STREAM is, ANTLR3_MARKER index); +static ANTLR3_UINT32 size (pANTLR3_INT_STREAM is); + + +// Helper functions +// +static void fillBuffer (pANTLR3_COMMON_TREE_NODE_STREAM ctns, pANTLR3_BASE_TREE t); +static void fillBufferRoot (pANTLR3_COMMON_TREE_NODE_STREAM ctns); + +// Constructors +// +static void antlr3TreeNodeStreamFree (pANTLR3_TREE_NODE_STREAM tns); +static void antlr3CommonTreeNodeStreamFree (pANTLR3_COMMON_TREE_NODE_STREAM ctns); + +ANTLR3_API pANTLR3_TREE_NODE_STREAM +antlr3TreeNodeStreamNew() +{ + pANTLR3_TREE_NODE_STREAM stream; + + // Memory for the interface structure + // + stream = (pANTLR3_TREE_NODE_STREAM) ANTLR3_CALLOC(1, sizeof(ANTLR3_TREE_NODE_STREAM)); + + if (stream == NULL) + { + return NULL; + } + + // Install basic API + // + stream->replaceChildren = replaceChildren; + stream->free = antlr3TreeNodeStreamFree; + + return stream; +} + +static void +antlr3TreeNodeStreamFree(pANTLR3_TREE_NODE_STREAM stream) +{ + ANTLR3_FREE(stream); +} + +ANTLR3_API pANTLR3_COMMON_TREE_NODE_STREAM +antlr3CommonTreeNodeStreamNewTree(pANTLR3_BASE_TREE tree, ANTLR3_UINT32 hint) +{ + pANTLR3_COMMON_TREE_NODE_STREAM stream; + + stream = antlr3CommonTreeNodeStreamNew(tree->strFactory, hint); + + if (stream == NULL) + { + return NULL; + } + stream->root = tree; + + return stream; +} + +ANTLR3_API pANTLR3_COMMON_TREE_NODE_STREAM +antlr3CommonTreeNodeStreamNewStream(pANTLR3_COMMON_TREE_NODE_STREAM inStream) +{ + pANTLR3_COMMON_TREE_NODE_STREAM stream; + + // Memory for the interface structure + // + stream = (pANTLR3_COMMON_TREE_NODE_STREAM) ANTLR3_CALLOC(1, sizeof(ANTLR3_COMMON_TREE_NODE_STREAM)); + + if (stream == NULL) + { + return NULL; + } + + // Copy in all the reusable parts of the originating stream and create new + // pieces where necessary. + // + + // String factory for tree walker + // + stream->stringFactory = inStream->stringFactory; + + // Create an adaptor for the common tree node stream + // + stream->adaptor = inStream->adaptor; + + // Create space for the tree node stream interface + // + stream->tnstream = antlr3TreeNodeStreamNew(); + + if (stream->tnstream == NULL) + { + stream->free (stream); + + return NULL; + } + + // Create space for the INT_STREAM interface + // + stream->tnstream->istream = antlr3IntStreamNew(); + + if (stream->tnstream->istream == NULL) + { + stream->tnstream->free (stream->tnstream); + stream->free (stream); + + return NULL; + } + + // Install the common tree node stream API + // + stream->addNavigationNode = addNavigationNode; + stream->hasUniqueNavigationNodes = hasUniqueNavigationNodes; + stream->newDownNode = newDownNode; + stream->newUpNode = newUpNode; + stream->reset = reset; + stream->push = push; + stream->pop = pop; + stream->getLookaheadSize = getLookaheadSize; + + stream->free = antlr3CommonTreeNodeStreamFree; + + // Install the tree node stream API + // + stream->tnstream->getTreeAdaptor = getTreeAdaptor; + stream->tnstream->getTreeSource = getTreeSource; + stream->tnstream->_LT = _LT; + stream->tnstream->setUniqueNavigationNodes = setUniqueNavigationNodes; + stream->tnstream->toString = toString; + stream->tnstream->toStringSS = toStringSS; + stream->tnstream->toStringWork = toStringWork; + stream->tnstream->get = get; + + // Install INT_STREAM interface + // + stream->tnstream->istream->consume = consume; + stream->tnstream->istream->index = tindex; + stream->tnstream->istream->_LA = _LA; + stream->tnstream->istream->mark = mark; + stream->tnstream->istream->release = release; + stream->tnstream->istream->rewind = rewindMark; + stream->tnstream->istream->rewindLast = rewindLast; + stream->tnstream->istream->seek = seek; + stream->tnstream->istream->size = size; + + // Initialize data elements of INT stream + // + stream->tnstream->istream->type = ANTLR3_COMMONTREENODE; + stream->tnstream->istream->super = (stream->tnstream); + + // Initialize data elements of TREE stream + // + stream->tnstream->ctns = stream; + + // Initialize data elements of the COMMON TREE NODE stream + // + stream->super = NULL; + stream->uniqueNavigationNodes = ANTLR3_FALSE; + stream->markers = NULL; + stream->nodeStack = inStream->nodeStack; + + // Create the node list map + // + stream->nodes = antlr3VectorNew(DEFAULT_INITIAL_BUFFER_SIZE); + stream->p = -1; + + // Install the navigation nodes + // + + // Install the navigation nodes + // + antlr3SetCTAPI(&(stream->UP)); + antlr3SetCTAPI(&(stream->DOWN)); + antlr3SetCTAPI(&(stream->EOF_NODE)); + antlr3SetCTAPI(&(stream->INVALID_NODE)); + + stream->UP.token = inStream->UP.token; + inStream->UP.token->strFactory = stream->stringFactory; + stream->DOWN.token = inStream->DOWN.token; + inStream->DOWN.token->strFactory = stream->stringFactory; + stream->EOF_NODE.token = inStream->EOF_NODE.token; + inStream->EOF_NODE.token->strFactory = stream->stringFactory; + stream->INVALID_NODE.token = inStream->INVALID_NODE.token; + inStream->INVALID_NODE.token->strFactory= stream->stringFactory; + + // Reuse the root tree of the originating stream + // + stream->root = inStream->root; + + // Signal that this is a rewriting stream so we don't + // free the originating tree. Anything that we rewrite or + // duplicate here will be done through the adaptor or + // the original tree factory. + // + stream->isRewriter = ANTLR3_TRUE; + return stream; +} + +ANTLR3_API pANTLR3_COMMON_TREE_NODE_STREAM +antlr3CommonTreeNodeStreamNew(pANTLR3_STRING_FACTORY strFactory, ANTLR3_UINT32 hint) +{ + pANTLR3_COMMON_TREE_NODE_STREAM stream; + pANTLR3_COMMON_TOKEN token; + + // Memory for the interface structure + // + stream = (pANTLR3_COMMON_TREE_NODE_STREAM) ANTLR3_CALLOC(1, sizeof(ANTLR3_COMMON_TREE_NODE_STREAM)); + + if (stream == NULL) + { + return NULL; + } + + // String factory for tree walker + // + stream->stringFactory = strFactory; + + // Create an adaptor for the common tree node stream + // + stream->adaptor = ANTLR3_TREE_ADAPTORNew(strFactory); + + if (stream->adaptor == NULL) + { + stream->free(stream); + return NULL; + } + + // Create space for the tree node stream interface + // + stream->tnstream = antlr3TreeNodeStreamNew(); + + if (stream->tnstream == NULL) + { + stream->adaptor->free (stream->adaptor); + stream->free (stream); + + return NULL; + } + + // Create space for the INT_STREAM interface + // + stream->tnstream->istream = antlr3IntStreamNew(); + + if (stream->tnstream->istream == NULL) + { + stream->adaptor->free (stream->adaptor); + stream->tnstream->free (stream->tnstream); + stream->free (stream); + + return NULL; + } + + // Install the common tree node stream API + // + stream->addNavigationNode = addNavigationNode; + stream->hasUniqueNavigationNodes = hasUniqueNavigationNodes; + stream->newDownNode = newDownNode; + stream->newUpNode = newUpNode; + stream->reset = reset; + stream->push = push; + stream->pop = pop; + + stream->free = antlr3CommonTreeNodeStreamFree; + + // Install the tree node stream API + // + stream->tnstream->getTreeAdaptor = getTreeAdaptor; + stream->tnstream->getTreeSource = getTreeSource; + stream->tnstream->_LT = _LT; + stream->tnstream->setUniqueNavigationNodes = setUniqueNavigationNodes; + stream->tnstream->toString = toString; + stream->tnstream->toStringSS = toStringSS; + stream->tnstream->toStringWork = toStringWork; + stream->tnstream->get = get; + + // Install INT_STREAM interface + // + stream->tnstream->istream->consume = consume; + stream->tnstream->istream->index = tindex; + stream->tnstream->istream->_LA = _LA; + stream->tnstream->istream->mark = mark; + stream->tnstream->istream->release = release; + stream->tnstream->istream->rewind = rewindMark; + stream->tnstream->istream->rewindLast = rewindLast; + stream->tnstream->istream->seek = seek; + stream->tnstream->istream->size = size; + + // Initialize data elements of INT stream + // + stream->tnstream->istream->type = ANTLR3_COMMONTREENODE; + stream->tnstream->istream->super = (stream->tnstream); + + // Initialize data elements of TREE stream + // + stream->tnstream->ctns = stream; + + // Initialize data elements of the COMMON TREE NODE stream + // + stream->super = NULL; + stream->uniqueNavigationNodes = ANTLR3_FALSE; + stream->markers = NULL; + stream->nodeStack = antlr3StackNew(INITIAL_CALL_STACK_SIZE); + + // Create the node list map + // + if (hint == 0) + { + hint = DEFAULT_INITIAL_BUFFER_SIZE; + } + stream->nodes = antlr3VectorNew(hint); + stream->p = -1; + + // Install the navigation nodes + // + antlr3SetCTAPI(&(stream->UP)); + antlr3SetCTAPI(&(stream->DOWN)); + antlr3SetCTAPI(&(stream->EOF_NODE)); + antlr3SetCTAPI(&(stream->INVALID_NODE)); + + token = antlr3CommonTokenNew(ANTLR3_TOKEN_UP); + token->strFactory = strFactory; + token->textState = ANTLR3_TEXT_CHARP; + token->tokText.chars = (pANTLR3_UCHAR)"UP"; + stream->UP.token = token; + + token = antlr3CommonTokenNew(ANTLR3_TOKEN_DOWN); + token->strFactory = strFactory; + token->textState = ANTLR3_TEXT_CHARP; + token->tokText.chars = (pANTLR3_UCHAR)"DOWN"; + stream->DOWN.token = token; + + token = antlr3CommonTokenNew(ANTLR3_TOKEN_EOF); + token->strFactory = strFactory; + token->textState = ANTLR3_TEXT_CHARP; + token->tokText.chars = (pANTLR3_UCHAR)"EOF"; + stream->EOF_NODE.token = token; + + token = antlr3CommonTokenNew(ANTLR3_TOKEN_INVALID); + token->strFactory = strFactory; + token->textState = ANTLR3_TEXT_CHARP; + token->tokText.chars = (pANTLR3_UCHAR)"INVALID"; + stream->INVALID_NODE.token = token; + + + return stream; +} + +/// Free up any resources that belong to this common tree node stream. +/// +static void antlr3CommonTreeNodeStreamFree (pANTLR3_COMMON_TREE_NODE_STREAM ctns) +{ + + // If this is a rewrting stream, then certain resources + // belong to the originating node stream and we do not + // free them here. + // + if (ctns->isRewriter != ANTLR3_TRUE) + { + ctns->adaptor ->free (ctns->adaptor); + + if (ctns->nodeStack != NULL) + { + ctns->nodeStack->free(ctns->nodeStack); + } + + ANTLR3_FREE(ctns->INVALID_NODE.token); + ANTLR3_FREE(ctns->EOF_NODE.token); + ANTLR3_FREE(ctns->DOWN.token); + ANTLR3_FREE(ctns->UP.token); + } + + if (ctns->nodes != NULL) + { + ctns->nodes ->free (ctns->nodes); + } + ctns->tnstream->istream ->free (ctns->tnstream->istream); + ctns->tnstream ->free (ctns->tnstream); + + + ANTLR3_FREE(ctns); +} + +// ------------------------------------------------------------------------------ +// Local helpers +// + +/// Walk and fill the tree node buffer from the root tree +/// +static void +fillBufferRoot(pANTLR3_COMMON_TREE_NODE_STREAM ctns) +{ + // Call the generic buffer routine with the root as the + // argument + // + fillBuffer(ctns, ctns->root); + ctns->p = 0; // Indicate we are at buffer start +} + +/// Walk tree with depth-first-search and fill nodes buffer. +/// Don't add in DOWN, UP nodes if the supplied tree is a list (t is isNilNode) +// such as the root tree is. +/// +static void +fillBuffer(pANTLR3_COMMON_TREE_NODE_STREAM ctns, pANTLR3_BASE_TREE t) +{ + ANTLR3_BOOLEAN nilNode; + ANTLR3_UINT32 nCount; + ANTLR3_UINT32 c; + + nilNode = ctns->adaptor->isNilNode(ctns->adaptor, t); + + // If the supplied node is not a nil (list) node then we + // add in the node itself to the vector + // + if (nilNode == ANTLR3_FALSE) + { + ctns->nodes->add(ctns->nodes, t, NULL); + } + + // Only add a DOWN node if the tree is not a nil tree and + // the tree does have children. + // + nCount = t->getChildCount(t); + + if (nilNode == ANTLR3_FALSE && nCount>0) + { + ctns->addNavigationNode(ctns, ANTLR3_TOKEN_DOWN); + } + + // We always add any children the tree contains, which is + // a recursive call to this function, which will cause similar + // recursion and implement a depth first addition + // + for (c = 0; c < nCount; c++) + { + fillBuffer(ctns, ctns->adaptor->getChild(ctns->adaptor, t, c)); + } + + // If the tree had children and was not a nil (list) node, then we + // we need to add an UP node here to match the DOWN node + // + if (nilNode == ANTLR3_FALSE && nCount > 0) + { + ctns->addNavigationNode(ctns, ANTLR3_TOKEN_UP); + } +} + + +// ------------------------------------------------------------------------------ +// Interface functions +// + +/// Reset the input stream to the start of the input nodes. +/// +static void +reset (pANTLR3_COMMON_TREE_NODE_STREAM ctns) +{ + if (ctns->p != -1) + { + ctns->p = 0; + } + ctns->tnstream->istream->lastMarker = 0; + + + // Free and reset the node stack only if this is not + // a rewriter, which is going to reuse the originating + // node streams node stack + // + if (ctns->isRewriter != ANTLR3_TRUE) + { + if (ctns->nodeStack != NULL) + { + ctns->nodeStack->free(ctns->nodeStack); + ctns->nodeStack = antlr3StackNew(INITIAL_CALL_STACK_SIZE); + } + } +} + + +static pANTLR3_BASE_TREE +LB(pANTLR3_TREE_NODE_STREAM tns, ANTLR3_INT32 k) +{ + if ( k==0) + { + return &(tns->ctns->INVALID_NODE.baseTree); + } + + if ( (tns->ctns->p - k) < 0) + { + return &(tns->ctns->INVALID_NODE.baseTree); + } + + return tns->ctns->nodes->get(tns->ctns->nodes, tns->ctns->p - k); +} + +/// Get tree node at current input pointer + i ahead where i=1 is next node. +/// i<0 indicates nodes in the past. So -1 is previous node and -2 is +/// two nodes ago. LT(0) is undefined. For i>=n, return null. +/// Return null for LT(0) and any index that results in an absolute address +/// that is negative. +/// +/// This is analogous to the _LT() method of the TokenStream, but this +/// returns a tree node instead of a token. Makes code gen identical +/// for both parser and tree grammars. :) +/// +static pANTLR3_BASE_TREE +_LT (pANTLR3_TREE_NODE_STREAM tns, ANTLR3_INT32 k) +{ + if (tns->ctns->p == -1) + { + fillBufferRoot(tns->ctns); + } + + if (k < 0) + { + return LB(tns, -k); + } + else if (k == 0) + { + return &(tns->ctns->INVALID_NODE.baseTree); + } + + // k was a legitimate request, + // + if (( tns->ctns->p + k - 1) >= (ANTLR3_INT32)(tns->ctns->nodes->count)) + { + return &(tns->ctns->EOF_NODE.baseTree); + } + + return tns->ctns->nodes->get(tns->ctns->nodes, tns->ctns->p + k - 1); +} + +/// Where is this stream pulling nodes from? This is not the name, but +/// the object that provides node objects. +/// +static pANTLR3_BASE_TREE +getTreeSource (pANTLR3_TREE_NODE_STREAM tns) +{ + return tns->ctns->root; +} + +/// Consume the next node from the input stream +/// +static void +consume (pANTLR3_INT_STREAM is) +{ + pANTLR3_TREE_NODE_STREAM tns; + pANTLR3_COMMON_TREE_NODE_STREAM ctns; + + tns = (pANTLR3_TREE_NODE_STREAM)(is->super); + ctns = tns->ctns; + + if (ctns->p == -1) + { + fillBufferRoot(ctns); + } + ctns->p++; +} + +static ANTLR3_UINT32 +_LA (pANTLR3_INT_STREAM is, ANTLR3_INT32 i) +{ + pANTLR3_TREE_NODE_STREAM tns; + pANTLR3_BASE_TREE t; + + tns = (pANTLR3_TREE_NODE_STREAM)(is->super); + + // Ask LT for the 'token' at that position + // + t = tns->_LT(tns, i); + + if (t == NULL) + { + return ANTLR3_TOKEN_INVALID; + } + + // Token node was there so return the type of it + // + return t->getType(t); +} + +/// Mark the state of the input stream so that we can come back to it +/// after a syntactic predicate and so on. +/// +static ANTLR3_MARKER +mark (pANTLR3_INT_STREAM is) +{ + pANTLR3_TREE_NODE_STREAM tns; + pANTLR3_COMMON_TREE_NODE_STREAM ctns; + + tns = (pANTLR3_TREE_NODE_STREAM)(is->super); + ctns = tns->ctns; + + if (tns->ctns->p == -1) + { + fillBufferRoot(tns->ctns); + } + + // Return the current mark point + // + ctns->tnstream->istream->lastMarker = ctns->tnstream->istream->index(ctns->tnstream->istream); + + return ctns->tnstream->istream->lastMarker; +} + +static void +release (pANTLR3_INT_STREAM is, ANTLR3_MARKER marker) +{ +} + +/// Rewind the current state of the tree walk to the state it +/// was in when mark() was called and it returned marker. Also, +/// wipe out the lookahead which will force reloading a few nodes +/// but it is better than making a copy of the lookahead buffer +/// upon mark(). +/// +static void +rewindMark (pANTLR3_INT_STREAM is, ANTLR3_MARKER marker) +{ + is->seek(is, marker); +} + +static void +rewindLast (pANTLR3_INT_STREAM is) +{ + is->seek(is, is->lastMarker); +} + +/// consume() ahead until we hit index. Can't just jump ahead--must +/// spit out the navigation nodes. +/// +static void +seek (pANTLR3_INT_STREAM is, ANTLR3_MARKER index) +{ + pANTLR3_TREE_NODE_STREAM tns; + pANTLR3_COMMON_TREE_NODE_STREAM ctns; + + tns = (pANTLR3_TREE_NODE_STREAM)(is->super); + ctns = tns->ctns; + + ctns->p = ANTLR3_UINT32_CAST(index); +} + +static ANTLR3_MARKER +tindex (pANTLR3_INT_STREAM is) +{ + pANTLR3_TREE_NODE_STREAM tns; + pANTLR3_COMMON_TREE_NODE_STREAM ctns; + + tns = (pANTLR3_TREE_NODE_STREAM)(is->super); + ctns = tns->ctns; + + return (ANTLR3_MARKER)(ctns->p); +} + +/// Expensive to compute the size of the whole tree while parsing. +/// This method only returns how much input has been seen so far. So +/// after parsing it returns true size. +/// +static ANTLR3_UINT32 +size (pANTLR3_INT_STREAM is) +{ + pANTLR3_TREE_NODE_STREAM tns; + pANTLR3_COMMON_TREE_NODE_STREAM ctns; + + tns = (pANTLR3_TREE_NODE_STREAM)(is->super); + ctns = tns->ctns; + + if (ctns->p == -1) + { + fillBufferRoot(ctns); + } + + return ctns->nodes->size(ctns->nodes); +} + +/// As we flatten the tree, we use UP, DOWN nodes to represent +/// the tree structure. When debugging we need unique nodes +/// so instantiate new ones when uniqueNavigationNodes is true. +/// +static void +addNavigationNode (pANTLR3_COMMON_TREE_NODE_STREAM ctns, ANTLR3_UINT32 ttype) +{ + pANTLR3_BASE_TREE node; + + node = NULL; + + if (ttype == ANTLR3_TOKEN_DOWN) + { + if (ctns->hasUniqueNavigationNodes(ctns) == ANTLR3_TRUE) + { + node = ctns->newDownNode(ctns); + } + else + { + node = &(ctns->DOWN.baseTree); + } + } + else + { + if (ctns->hasUniqueNavigationNodes(ctns) == ANTLR3_TRUE) + { + node = ctns->newUpNode(ctns); + } + else + { + node = &(ctns->UP.baseTree); + } + } + + // Now add the node we decided upon. + // + ctns->nodes->add(ctns->nodes, node, NULL); +} + + +static pANTLR3_BASE_TREE_ADAPTOR +getTreeAdaptor (pANTLR3_TREE_NODE_STREAM tns) +{ + return tns->ctns->adaptor; +} + +static ANTLR3_BOOLEAN +hasUniqueNavigationNodes (pANTLR3_COMMON_TREE_NODE_STREAM ctns) +{ + return ctns->uniqueNavigationNodes; +} + +static void +setUniqueNavigationNodes (pANTLR3_TREE_NODE_STREAM tns, ANTLR3_BOOLEAN uniqueNavigationNodes) +{ + tns->ctns->uniqueNavigationNodes = uniqueNavigationNodes; +} + + +/// Print out the entire tree including DOWN/UP nodes. Uses +/// a recursive walk. Mostly useful for testing as it yields +/// the token types not text. +/// +static pANTLR3_STRING +toString (pANTLR3_TREE_NODE_STREAM tns) +{ + + return tns->toStringSS(tns, tns->ctns->root, NULL); +} + +static pANTLR3_STRING +toStringSS (pANTLR3_TREE_NODE_STREAM tns, pANTLR3_BASE_TREE start, pANTLR3_BASE_TREE stop) +{ + pANTLR3_STRING buf; + + buf = tns->ctns->stringFactory->newRaw(tns->ctns->stringFactory); + + tns->toStringWork(tns, start, stop, buf); + + return buf; +} + +static void +toStringWork (pANTLR3_TREE_NODE_STREAM tns, pANTLR3_BASE_TREE p, pANTLR3_BASE_TREE stop, pANTLR3_STRING buf) +{ + + ANTLR3_UINT32 n; + ANTLR3_UINT32 c; + + if (!p->isNilNode(p) ) + { + pANTLR3_STRING text; + + text = p->toString(p); + + if (text == NULL) + { + text = tns->ctns->stringFactory->newRaw(tns->ctns->stringFactory); + + text->addc (text, ' '); + text->addi (text, p->getType(p)); + } + + buf->appendS(buf, text); + } + + if (p == stop) + { + return; /* Finished */ + } + + n = p->getChildCount(p); + + if (n > 0 && ! p->isNilNode(p) ) + { + buf->addc (buf, ' '); + buf->addi (buf, ANTLR3_TOKEN_DOWN); + } + + for (c = 0; c<n ; c++) + { + pANTLR3_BASE_TREE child; + + child = p->getChild(p, c); + tns->toStringWork(tns, child, stop, buf); + } + + if (n > 0 && ! p->isNilNode(p) ) + { + buf->addc (buf, ' '); + buf->addi (buf, ANTLR3_TOKEN_UP); + } +} + +static ANTLR3_UINT32 +getLookaheadSize (pANTLR3_COMMON_TREE_NODE_STREAM ctns) +{ + return ctns->tail < ctns->head + ? (ctns->lookAheadLength - ctns->head + ctns->tail) + : (ctns->tail - ctns->head); +} + +static pANTLR3_BASE_TREE +newDownNode (pANTLR3_COMMON_TREE_NODE_STREAM ctns) +{ + pANTLR3_COMMON_TREE dNode; + pANTLR3_COMMON_TOKEN token; + + token = antlr3CommonTokenNew(ANTLR3_TOKEN_DOWN); + token->textState = ANTLR3_TEXT_CHARP; + token->tokText.chars = (pANTLR3_UCHAR)"DOWN"; + dNode = antlr3CommonTreeNewFromToken(token); + + return &(dNode->baseTree); +} + +static pANTLR3_BASE_TREE +newUpNode (pANTLR3_COMMON_TREE_NODE_STREAM ctns) +{ + pANTLR3_COMMON_TREE uNode; + pANTLR3_COMMON_TOKEN token; + + token = antlr3CommonTokenNew(ANTLR3_TOKEN_UP); + token->textState = ANTLR3_TEXT_CHARP; + token->tokText.chars = (pANTLR3_UCHAR)"UP"; + uNode = antlr3CommonTreeNewFromToken(token); + + return &(uNode->baseTree); +} + +/// Replace from start to stop child index of parent with t, which might +/// be a list. Number of children may be different +/// after this call. The stream is notified because it is walking the +/// tree and might need to know you are monkey-ing with the underlying +/// tree. Also, it might be able to modify the node stream to avoid +/// re-streaming for future phases. +/// +/// If parent is null, don't do anything; must be at root of overall tree. +/// Can't replace whatever points to the parent externally. Do nothing. +/// +static void +replaceChildren (pANTLR3_TREE_NODE_STREAM tns, pANTLR3_BASE_TREE parent, ANTLR3_INT32 startChildIndex, ANTLR3_INT32 stopChildIndex, pANTLR3_BASE_TREE t) +{ + if (parent != NULL) + { + pANTLR3_BASE_TREE_ADAPTOR adaptor; + pANTLR3_COMMON_TREE_ADAPTOR cta; + + adaptor = tns->getTreeAdaptor(tns); + cta = (pANTLR3_COMMON_TREE_ADAPTOR)(adaptor->super); + + adaptor->replaceChildren(adaptor, parent, startChildIndex, stopChildIndex, t); + } +} + +static pANTLR3_BASE_TREE +get (pANTLR3_TREE_NODE_STREAM tns, ANTLR3_INT32 k) +{ + if (tns->ctns->p == -1) + { + fillBufferRoot(tns->ctns); + } + + return tns->ctns->nodes->get(tns->ctns->nodes, k); +} + +static void +push (pANTLR3_COMMON_TREE_NODE_STREAM ctns, ANTLR3_INT32 index) +{ + ctns->nodeStack->push(ctns->nodeStack, ANTLR3_FUNC_PTR(ctns->p), NULL); // Save current index + ctns->tnstream->istream->seek(ctns->tnstream->istream, index); +} + +static ANTLR3_INT32 +pop (pANTLR3_COMMON_TREE_NODE_STREAM ctns) +{ + ANTLR3_INT32 retVal; + + retVal = ANTLR3_UINT32_CAST(ctns->nodeStack->pop(ctns->nodeStack)); + ctns->tnstream->istream->seek(ctns->tnstream->istream, retVal); + return retVal; +} diff --git a/impl/antlr/libantlr3c-3.4/src/antlr3convertutf.c b/impl/antlr/libantlr3c-3.4/src/antlr3convertutf.c new file mode 100644 index 0000000..7c2f060 --- /dev/null +++ b/impl/antlr/libantlr3c-3.4/src/antlr3convertutf.c @@ -0,0 +1,532 @@ +/* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + +/* --------------------------------------------------------------------- + + Conversions between UTF32, UTF-16, and UTF-8. Source code file. + Author: Mark E. Davis, 1994. + Rev History: Rick McGowan, fixes & updates May 2001. + Sept 2001: fixed const & error conditions per + mods suggested by S. Parent & A. Lillich. + June 2002: Tim Dodd added detection and handling of incomplete + source sequences, enhanced error detection, added casts + to eliminate compiler warnings. + July 2003: slight mods to back out aggressive FFFE detection. + Jan 2004: updated switches in from-UTF8 conversions. + Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions. + + See the header file "ConvertUTF.h" for complete documentation. + +------------------------------------------------------------------------ */ + + +#include "antlr3convertutf.h" + +#ifdef CVTUTF_DEBUG +#include <stdio.h> +#endif + + + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF32toUTF16 ( + const UTF32** sourceStart, const UTF32* sourceEnd, + UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF32* source = *sourceStart; + UTF16* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + if (target >= targetEnd) { + result = targetExhausted; break; + } + ch = *source++; + if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ + /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + *target++ = (UTF16)ch; /* normal case */ + } + } else if (ch > UNI_MAX_LEGAL_UTF32) { + if (flags == strictConversion) { + result = sourceIllegal; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + /* target is a character in range 0xFFFF - 0x10FFFF. */ + if (target + 1 >= targetEnd) { + --source; /* Back up source pointer! */ + result = targetExhausted; break; + } + ch -= halfBase; + *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START); + *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START); + } + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF16toUTF32 ( + const UTF16** sourceStart, const UTF16* sourceEnd, + UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF16* source = *sourceStart; + UTF32* target = *targetStart; + UTF32 ch, ch2; + while (source < sourceEnd) { + const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ + ch = *source++; + /* If we have a surrogate pair, convert to UTF32 first. */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + /* If the 16 bits following the high surrogate are in the source buffer... */ + if (source < sourceEnd) { + ch2 = *source; + /* If it's a low surrogate, convert to UTF32. */ + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + ++source; + } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } else { /* We don't have the 16 bits following the high surrogate. */ + --source; /* return to the high surrogate */ + result = sourceExhausted; + break; + } + } else if (flags == strictConversion) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + if (target >= targetEnd) { + source = oldSource; /* Back up source pointer! */ + result = targetExhausted; break; + } + *target++ = ch; + } + *sourceStart = source; + *targetStart = target; +#ifdef CVTUTF_DEBUG +if (result == sourceIllegal) { + ANTLR3_FPRINTF(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2); + fflush(stderr); +} +#endif + return result; +} + +/* --------------------------------------------------------------------- */ + +/* + * Index into the table below with the first byte of a UTF-8 sequence to + * get the number of trailing bytes that are supposed to follow it. + * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is + * left as-is for anyone who may want to do such conversion, which was + * allowed in earlier algorithms. + */ +static const char trailingBytesForUTF8[256] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 +}; + +/* + * Magic values subtracted from a buffer value during UTF8 conversion. + * This table contains as many values as there might be trailing bytes + * in a UTF-8 sequence. + */ +static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, + 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; + +/* + * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed + * into the first byte, depending on how many bytes follow. There are + * as many entries in this table as there are UTF-8 sequence types. + * (I.e., one byte sequence, two byte... etc.). Remember that sequencs + * for *legal* UTF-8 will be 4 or fewer bytes total. + */ +static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + +/* --------------------------------------------------------------------- */ + +/* The interface converts a whole buffer to avoid function-call overhead. + * Constants have been gathered. Loops & conditionals have been removed as + * much as possible for efficiency, in favor of drop-through switches. + * (See "Note A" at the bottom of the file for equivalent code.) + * If your compiler supports it, the "isLegalUTF8" call can be turned + * into an inline function. + */ + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF16toUTF8 ( + const UTF16** sourceStart, const UTF16* sourceEnd, + UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF16* source = *sourceStart; + UTF8* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + unsigned short bytesToWrite = 0; + const UTF32 byteMask = 0xBF; + const UTF32 byteMark = 0x80; + const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ + ch = *source++; + /* If we have a surrogate pair, convert to UTF32 first. */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + /* If the 16 bits following the high surrogate are in the source buffer... */ + if (source < sourceEnd) { + UTF32 ch2 = *source; + /* If it's a low surrogate, convert to UTF32. */ + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + ++source; + } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } else { /* We don't have the 16 bits following the high surrogate. */ + --source; /* return to the high surrogate */ + result = sourceExhausted; + break; + } + } else if (flags == strictConversion) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + /* Figure out how many bytes the result will require */ + if (ch < (UTF32)0x80) { bytesToWrite = 1; + } else if (ch < (UTF32)0x800) { bytesToWrite = 2; + } else if (ch < (UTF32)0x10000) { bytesToWrite = 3; + } else if (ch < (UTF32)0x110000) { bytesToWrite = 4; + } else { bytesToWrite = 3; + ch = UNI_REPLACEMENT_CHAR; + } + + target += bytesToWrite; + if (target > targetEnd) { + source = oldSource; /* Back up source pointer! */ + target -= bytesToWrite; result = targetExhausted; break; + } + switch (bytesToWrite) { /* note: everything falls through. */ + case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 1: *--target = (UTF8)(ch | firstByteMark[bytesToWrite]); + } + target += bytesToWrite; + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- */ + +/* + * Utility routine to tell whether a sequence of bytes is legal UTF-8. + * This must be called with the length pre-determined by the first byte. + * If not calling this from ConvertUTF8to*, then the length can be set by: + * length = trailingBytesForUTF8[*source]+1; + * and the sequence is illegal right away if there aren't that many bytes + * available. + * If presented with a length > 4, this returns false. The Unicode + * definition of UTF-8 goes up to 4-byte sequences. + */ + +static ANTLR3_BOOLEAN +isLegalUTF8(const UTF8 *source, int length) { + UTF8 a; + const UTF8 *srcptr = source+length; + switch (length) { + default: return false; + /* Everything else falls through when "true"... */ + case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; + case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false; + case 2: if ((a = (*--srcptr)) > 0xBF) return false; + + switch (*source) { + /* no fall-through in this inner switch */ + case 0xE0: if (a < 0xA0) return false; break; + case 0xED: if (a > 0x9F) return false; break; + case 0xF0: if (a < 0x90) return false; break; + case 0xF4: if (a > 0x8F) return false; break; + default: if (a < 0x80) return false; + } + + case 1: if (*source >= 0x80 && *source < 0xC2) return false; + } + if (*source > 0xF4) return false; + return true; +} + +/* --------------------------------------------------------------------- */ + +/* + * Exported function to return whether a UTF-8 sequence is legal or not. + * This is not used here; it's just exported. + */ +ANTLR3_BOOLEAN +isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) { + int length = trailingBytesForUTF8[*source]+1; + if (source+length > sourceEnd) { + return false; + } + return isLegalUTF8(source, length); +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF8toUTF16 ( + const UTF8** sourceStart, const UTF8* sourceEnd, + UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF8* source = *sourceStart; + UTF16* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch = 0; + unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; + if (source + extraBytesToRead >= sourceEnd) { + result = sourceExhausted; break; + } + /* Do this check whether lenient or strict */ + if (! isLegalUTF8(source, extraBytesToRead+1)) { + result = sourceIllegal; + break; + } + /* + * The cases all fall through. See "Note A" below. + */ + switch (extraBytesToRead) { + case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ + case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */ + case 3: ch += *source++; ch <<= 6; + case 2: ch += *source++; ch <<= 6; + case 1: ch += *source++; ch <<= 6; + case 0: ch += *source++; + } + ch -= offsetsFromUTF8[extraBytesToRead]; + + if (target >= targetEnd) { + source -= (extraBytesToRead+1); /* Back up source pointer! */ + result = targetExhausted; break; + } + if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */ + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + source -= (extraBytesToRead+1); /* return to the illegal value itself */ + result = sourceIllegal; + break; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + *target++ = (UTF16)ch; /* normal case */ + } + } else if (ch > UNI_MAX_UTF16) { + if (flags == strictConversion) { + result = sourceIllegal; + source -= (extraBytesToRead+1); /* return to the start */ + break; /* Bail out; shouldn't continue */ + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + /* target is a character in range 0xFFFF - 0x10FFFF. */ + if (target + 1 >= targetEnd) { + source -= (extraBytesToRead+1); /* Back up source pointer! */ + result = targetExhausted; break; + } + ch -= halfBase; + *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START); + *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START); + } + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF32toUTF8 ( + const UTF32** sourceStart, const UTF32* sourceEnd, + UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF32* source = *sourceStart; + UTF8* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch; + unsigned short bytesToWrite = 0; + const UTF32 byteMask = 0xBF; + const UTF32 byteMark = 0x80; + ch = *source++; + if (flags == strictConversion ) { + /* UTF-16 surrogate values are illegal in UTF-32 */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + --source; /* return to the illegal value itself */ + result = sourceIllegal; + break; + } + } + /* + * Figure out how many bytes the result will require. Turn any + * illegally large UTF32 things (> Plane 17) into replacement chars. + */ + if (ch < (UTF32)0x80) { bytesToWrite = 1; + } else if (ch < (UTF32)0x800) { bytesToWrite = 2; + } else if (ch < (UTF32)0x10000) { bytesToWrite = 3; + } else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4; + } else { bytesToWrite = 3; + ch = UNI_REPLACEMENT_CHAR; + result = sourceIllegal; + } + + target += bytesToWrite; + if (target > targetEnd) { + --source; /* Back up source pointer! */ + target -= bytesToWrite; result = targetExhausted; break; + } + switch (bytesToWrite) { /* note: everything falls through. */ + case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6; + case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]); + } + target += bytesToWrite; + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- */ + +ConversionResult ConvertUTF8toUTF32 ( + const UTF8** sourceStart, const UTF8* sourceEnd, + UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) { + ConversionResult result = conversionOK; + const UTF8* source = *sourceStart; + UTF32* target = *targetStart; + while (source < sourceEnd) { + UTF32 ch = 0; + unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; + if (source + extraBytesToRead >= sourceEnd) { + result = sourceExhausted; break; + } + /* Do this check whether lenient or strict */ + if (! isLegalUTF8(source, extraBytesToRead+1)) { + result = sourceIllegal; + break; + } + /* + * The cases all fall through. See "Note A" below. + */ + switch (extraBytesToRead) { + case 5: ch += *source++; ch <<= 6; + case 4: ch += *source++; ch <<= 6; + case 3: ch += *source++; ch <<= 6; + case 2: ch += *source++; ch <<= 6; + case 1: ch += *source++; ch <<= 6; + case 0: ch += *source++; + } + ch -= offsetsFromUTF8[extraBytesToRead]; + + if (target >= targetEnd) { + source -= (extraBytesToRead+1); /* Back up the source pointer! */ + result = targetExhausted; break; + } + if (ch <= UNI_MAX_LEGAL_UTF32) { + /* + * UTF-16 surrogate values are illegal in UTF-32, and anything + * over Plane 17 (> 0x10FFFF) is illegal. + */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + if (flags == strictConversion) { + source -= (extraBytesToRead+1); /* return to the illegal value itself */ + result = sourceIllegal; + break; + } else { + *target++ = UNI_REPLACEMENT_CHAR; + } + } else { + *target++ = ch; + } + } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */ + result = sourceIllegal; + *target++ = UNI_REPLACEMENT_CHAR; + } + } + *sourceStart = source; + *targetStart = target; + return result; +} + +/* --------------------------------------------------------------------- + + Note A. + The fall-through switches in UTF-8 reading code save a + temp variable, some decrements & conditionals. The switches + are equivalent to the following loop: + { + int tmpBytesToRead = extraBytesToRead+1; + do { + ch += *source++; + --tmpBytesToRead; + if (tmpBytesToRead) ch <<= 6; + } while (tmpBytesToRead > 0); + } + In UTF-8 writing code, the switches on "bytesToWrite" are + similarly unrolled loops. + + --------------------------------------------------------------------- */ diff --git a/impl/antlr/libantlr3c-3.4/src/antlr3cyclicdfa.c b/impl/antlr/libantlr3c-3.4/src/antlr3cyclicdfa.c new file mode 100644 index 0000000..82e7222 --- /dev/null +++ b/impl/antlr/libantlr3c-3.4/src/antlr3cyclicdfa.c @@ -0,0 +1,204 @@ +/** Support functions for traversing cyclic DFA states as laid + * out in static initialized structures by the code generator. + * + * A DFA implemented as a set of transition tables. + * + * Any state that has a semantic predicate edge is special; those states + * are generated with if-then-else structures in a ->specialStateTransition() + * which is generated by cyclicDFA template. + * + * There are at most 32767 states (16-bit signed short). + * Could get away with byte sometimes but would have to generate different + * types and the simulation code too. For a point of reference, the Java + * lexer's Tokens rule DFA has 326 states roughly. + */ + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <antlr3defs.h> +#include <antlr3cyclicdfa.h> + +#ifdef ANTLR3_WINDOWS +#pragma warning( disable : 4100 ) +#endif + +static void +noViableAlt(pANTLR3_BASE_RECOGNIZER rec, pANTLR3_CYCLIC_DFA cdfa, ANTLR3_UINT32 s) +{ + // In backtracking mode, we just set the failed flag so that the + // alt can just exit right now. If we are parsing though, then + // we want the exception to be raised. + // + if (rec->state->backtracking > 0) + { + rec->state->failed = ANTLR3_TRUE; + } + else + { + rec->exConstruct(rec); + rec->state->exception->type = ANTLR3_NO_VIABLE_ALT_EXCEPTION; + rec->state->exception->message = cdfa->description; + rec->state->exception->decisionNum = cdfa->decisionNumber; + rec->state->exception->state = s; + } +} + +/** From the input stream, predict what alternative will succeed + * using this DFA (representing the covering regular approximation + * to the underlying CFL). Return an alternative number 1..n. Throw + * an exception upon error. + */ +ANTLR3_API ANTLR3_INT32 +antlr3dfapredict (void * ctx, pANTLR3_BASE_RECOGNIZER rec, pANTLR3_INT_STREAM is, pANTLR3_CYCLIC_DFA cdfa) +{ + ANTLR3_MARKER mark; + ANTLR3_INT32 s; + ANTLR3_INT32 specialState; + ANTLR3_INT32 c; + + mark = is->mark(is); /* Store where we are right now */ + s = 0; /* Always start with state 0 */ + + for (;;) + { + /* Pick out any special state entry for this state + */ + specialState = cdfa->special[s]; + + /* Transition the special state and consume an input token + */ + if (specialState >= 0) + { + s = cdfa->specialStateTransition(ctx, rec, is, cdfa, specialState); + + // Error? + // + if (s<0) + { + // If the predicate/rule raised an exception then we leave it + // in tact, else we have an NVA. + // + if (rec->state->error != ANTLR3_TRUE) + { + noViableAlt(rec,cdfa, s); + } + is->rewind(is, mark); + return 0; + } + is->consume(is); + continue; + } + + /* Accept state? + */ + if (cdfa->accept[s] >= 1) + { + is->rewind(is, mark); + return cdfa->accept[s]; + } + + /* Look for a normal transition state based upon the input token element + */ + c = is->_LA(is, 1); + + /* Check against min and max for this state + */ + if (c>= cdfa->min[s] && c <= cdfa->max[s]) + { + ANTLR3_INT32 snext; + + /* What is the next state? + */ + snext = cdfa->transition[s][c - cdfa->min[s]]; + + if (snext < 0) + { + /* Was in range but not a normal transition + * must check EOT, which is like the else clause. + * eot[s]>=0 indicates that an EOT edge goes to another + * state. + */ + if (cdfa->eot[s] >= 0) + { + s = cdfa->eot[s]; + is->consume(is); + continue; + } + noViableAlt(rec,cdfa, s); + is->rewind(is, mark); + return 0; + } + + /* New current state - move to it + */ + s = snext; + is->consume(is); + continue; + } + /* EOT Transition? + */ + if (cdfa->eot[s] >= 0) + { + s = cdfa->eot[s]; + is->consume(is); + continue; + } + /* EOF transition to accept state? + */ + if ( c == ANTLR3_TOKEN_EOF && cdfa->eof[s] >= 0) + { + is->rewind(is, mark); + return cdfa->accept[cdfa->eof[s]]; + } + + /* No alt, so bomb + */ + noViableAlt(rec, cdfa, s); + is->rewind(is, mark); + return 0; + } + +} + +/** Default special state implementation + */ +ANTLR3_API ANTLR3_INT32 +antlr3dfaspecialStateTransition (void * ctx, pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_CYCLIC_DFA dfa, ANTLR3_INT32 s) +{ + return -1; +} + +/* Default special transition implementation + */ +ANTLR3_API ANTLR3_INT32 +antlr3dfaspecialTransition (void * ctx, pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_CYCLIC_DFA dfa, ANTLR3_INT32 s) +{ + return 0; +} diff --git a/impl/antlr/libantlr3c-3.4/src/antlr3debughandlers.c b/impl/antlr/libantlr3c-3.4/src/antlr3debughandlers.c new file mode 100644 index 0000000..d5f177a --- /dev/null +++ b/impl/antlr/libantlr3c-3.4/src/antlr3debughandlers.c @@ -0,0 +1,1047 @@ +/// \file +/// Provides the debugging functions invoked by a recognizer +/// built using the debug generator mode of the antlr tool. +/// See antlr3debugeventlistener.h for documentation. +/// + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <antlr3.h> + +// Not everyone wishes to include the debugger stuff in their final deployment because +// it will then rely on being linked with the socket libraries. Hence if the programmer turns +// off the debugging, we do some dummy stuff that satifies compilers etc but means there is +// no debugger and no reliance on the socket librarires. If you set this flag, then using the -debug +// option to generate your code will produce code that just crashes, but then I presme you are smart +// enough to realize that building the libraries without debugger support means you can't call the +// debugger ;-) +// +#ifdef ANTLR3_NODEBUGGER +ANTLR3_API pANTLR3_DEBUG_EVENT_LISTENER +antlr3DebugListenerNew() +{ + ANTLR3_PRINTF("C runtime was compiled without debugger support. This program will crash!!"); + return NULL; +} +#else + +static ANTLR3_BOOLEAN handshake (pANTLR3_DEBUG_EVENT_LISTENER delboy); +static void enterRule (pANTLR3_DEBUG_EVENT_LISTENER delboy, const char * grammarFileName, const char * ruleName); +static void enterAlt (pANTLR3_DEBUG_EVENT_LISTENER delboy, int alt); +static void exitRule (pANTLR3_DEBUG_EVENT_LISTENER delboy, const char * grammarFileName, const char * ruleName); +static void enterSubRule (pANTLR3_DEBUG_EVENT_LISTENER delboy, int decisionNumber); +static void exitSubRule (pANTLR3_DEBUG_EVENT_LISTENER delboy, int decisionNumber); +static void enterDecision (pANTLR3_DEBUG_EVENT_LISTENER delboy, int decisionNumber); +static void exitDecision (pANTLR3_DEBUG_EVENT_LISTENER delboy, int decisionNumber); +static void consumeToken (pANTLR3_DEBUG_EVENT_LISTENER delboy, pANTLR3_COMMON_TOKEN t); +static void consumeHiddenToken (pANTLR3_DEBUG_EVENT_LISTENER delboy, pANTLR3_COMMON_TOKEN t); +static void LT (pANTLR3_DEBUG_EVENT_LISTENER delboy, int i, pANTLR3_COMMON_TOKEN t); +static void mark (pANTLR3_DEBUG_EVENT_LISTENER delboy, ANTLR3_MARKER marker); +static void rewindMark (pANTLR3_DEBUG_EVENT_LISTENER delboy, ANTLR3_MARKER marker); +static void rewindLast (pANTLR3_DEBUG_EVENT_LISTENER delboy); +static void beginBacktrack (pANTLR3_DEBUG_EVENT_LISTENER delboy, int level); +static void endBacktrack (pANTLR3_DEBUG_EVENT_LISTENER delboy, int level, ANTLR3_BOOLEAN successful); +static void location (pANTLR3_DEBUG_EVENT_LISTENER delboy, int line, int pos); +static void recognitionException (pANTLR3_DEBUG_EVENT_LISTENER delboy, pANTLR3_EXCEPTION e); +static void beginResync (pANTLR3_DEBUG_EVENT_LISTENER delboy); +static void endResync (pANTLR3_DEBUG_EVENT_LISTENER delboy); +static void semanticPredicate (pANTLR3_DEBUG_EVENT_LISTENER delboy, ANTLR3_BOOLEAN result, const char * predicate); +static void commence (pANTLR3_DEBUG_EVENT_LISTENER delboy); +static void terminate (pANTLR3_DEBUG_EVENT_LISTENER delboy); +static void consumeNode (pANTLR3_DEBUG_EVENT_LISTENER delboy, pANTLR3_BASE_TREE t); +static void LTT (pANTLR3_DEBUG_EVENT_LISTENER delboy, int i, pANTLR3_BASE_TREE t); +static void nilNode (pANTLR3_DEBUG_EVENT_LISTENER delboy, pANTLR3_BASE_TREE t); +static void errorNode (pANTLR3_DEBUG_EVENT_LISTENER delboy, pANTLR3_BASE_TREE t); +static void createNode (pANTLR3_DEBUG_EVENT_LISTENER delboy, pANTLR3_BASE_TREE t); +static void createNodeTok (pANTLR3_DEBUG_EVENT_LISTENER delboy, pANTLR3_BASE_TREE node, pANTLR3_COMMON_TOKEN token); +static void becomeRoot (pANTLR3_DEBUG_EVENT_LISTENER delboy, pANTLR3_BASE_TREE newRoot, pANTLR3_BASE_TREE oldRoot); +static void addChild (pANTLR3_DEBUG_EVENT_LISTENER delboy, pANTLR3_BASE_TREE root, pANTLR3_BASE_TREE child); +static void setTokenBoundaries (pANTLR3_DEBUG_EVENT_LISTENER delboy, pANTLR3_BASE_TREE t, ANTLR3_MARKER tokenStartIndex, ANTLR3_MARKER tokenStopIndex); +static void ack (pANTLR3_DEBUG_EVENT_LISTENER delboy); + +/// Create and initialize a new debug event listener that can be connected to +/// by ANTLRWorks and any other debugger via a socket. +/// +ANTLR3_API pANTLR3_DEBUG_EVENT_LISTENER +antlr3DebugListenerNew() +{ + pANTLR3_DEBUG_EVENT_LISTENER delboy; + + delboy = ANTLR3_CALLOC(1, sizeof(ANTLR3_DEBUG_EVENT_LISTENER)); + + if (delboy == NULL) + { + return NULL; + } + + // Initialize the API + // + delboy->addChild = addChild; + delboy->becomeRoot = becomeRoot; + delboy->beginBacktrack = beginBacktrack; + delboy->beginResync = beginResync; + delboy->commence = commence; + delboy->consumeHiddenToken = consumeHiddenToken; + delboy->consumeNode = consumeNode; + delboy->consumeToken = consumeToken; + delboy->createNode = createNode; + delboy->createNodeTok = createNodeTok; + delboy->endBacktrack = endBacktrack; + delboy->endResync = endResync; + delboy->enterAlt = enterAlt; + delboy->enterDecision = enterDecision; + delboy->enterRule = enterRule; + delboy->enterSubRule = enterSubRule; + delboy->exitDecision = exitDecision; + delboy->exitRule = exitRule; + delboy->exitSubRule = exitSubRule; + delboy->handshake = handshake; + delboy->location = location; + delboy->LT = LT; + delboy->LTT = LTT; + delboy->mark = mark; + delboy->nilNode = nilNode; + delboy->recognitionException = recognitionException; + delboy->rewind = rewindMark; + delboy->rewindLast = rewindLast; + delboy->semanticPredicate = semanticPredicate; + delboy->setTokenBoundaries = setTokenBoundaries; + delboy->terminate = terminate; + delboy->errorNode = errorNode; + + delboy->PROTOCOL_VERSION = 2; // ANTLR 3.1 is at protocol version 2 + + delboy->port = DEFAULT_DEBUGGER_PORT; + + return delboy; +} + +pANTLR3_DEBUG_EVENT_LISTENER +antlr3DebugListenerNewPort(ANTLR3_UINT32 port) +{ + pANTLR3_DEBUG_EVENT_LISTENER delboy; + + delboy = antlr3DebugListenerNew(); + + if (delboy != NULL) + { + delboy->port = port; + } + + return delboy; +} + +//-------------------------------------------------------------------------------- +// Support functions for sending stuff over the socket interface +// +static int +sockSend(SOCKET sock, const char * ptr, int len) +{ + int sent; + int thisSend; + + sent = 0; + + while (sent < len) + { + // Send as many bytes as we can + // + thisSend = send(sock, ptr, len - sent, 0); + + // Check for errors and tell the user if we got one + // + if (thisSend == -1) + { + return ANTLR3_FALSE; + } + + // Increment our offset by how many we were able to send + // + ptr += thisSend; + sent += thisSend; + } + return ANTLR3_TRUE; +} + +static ANTLR3_BOOLEAN +handshake (pANTLR3_DEBUG_EVENT_LISTENER delboy) +{ + /// Connection structure with which to wait and accept a connection from + /// a debugger. + /// + SOCKET serverSocket; + + // Connection structures to deal with the client after we accept the connection + // and the server while we accept a connection. + // + ANTLR3_SOCKADDRT client; + ANTLR3_SOCKADDRT server; + + // Buffer to construct our message in + // + char message[256]; + + // Specifies the length of the connection structure to accept() + // Windows use int, everyone else uses size_t + // + ANTLR3_SALENT sockaddr_len; + + // Option holder for setsockopt() + // + int optVal; + + if (delboy->initialized == ANTLR3_FALSE) + { + // Windows requires us to initialize WinSock. + // +#ifdef ANTLR3_WINDOWS + { + WORD wVersionRequested; + WSADATA wsaData; + int err; // Return code from WSAStartup + + // We must initialise the Windows socket system when the DLL is loaded. + // We are asking for Winsock 1.1 or better as we don't need anything + // too complicated for this. + // + wVersionRequested = MAKEWORD( 1, 1); + + err = WSAStartup( wVersionRequested, &wsaData ); + + if ( err != 0 ) + { + // Tell the user that we could not find a usable + // WinSock DLL + // + return FALSE; + } + } +#endif + + // Create the server socket, we are the server because we just wait until + // a debugger connects to the port we are listening on. + // + serverSocket = socket(AF_INET, SOCK_STREAM, 0); + + if (serverSocket == INVALID_SOCKET) + { + return ANTLR3_FALSE; + } + + // Set the listening port + // + server.sin_port = htons((unsigned short)delboy->port); + server.sin_family = AF_INET; + server.sin_addr.s_addr = htonl (INADDR_ANY); + + // We could allow a rebind on the same addr/port pair I suppose, but + // I imagine that most people will just want to start debugging one parser at once. + // Maybe change this at some point, but rejecting the bind at this point will ensure + // that people realize they have left something running in the background. + // + if (bind(serverSocket, (pANTLR3_SOCKADDRC)&server, sizeof(server)) == -1) + { + return ANTLR3_FALSE; + } + + // We have bound the socket to the port and address so we now ask the TCP subsystem + // to start listening on that address/port + // + if (listen(serverSocket, 1) == -1) + { + // Some error, just fail + // + return ANTLR3_FALSE; + } + + // Now we can try to accept a connection on the port + // + sockaddr_len = sizeof(client); + delboy->socket = accept(serverSocket, (pANTLR3_SOCKADDRC)&client, &sockaddr_len); + + // Having accepted a connection, we can stop listening and close down the socket + // + shutdown (serverSocket, 0x02); + ANTLR3_CLOSESOCKET (serverSocket); + + if (delboy->socket == -1) + { + return ANTLR3_FALSE; + } + + // Disable Nagle as this is essentially a chat exchange + // + optVal = 1; + setsockopt(delboy->socket, SOL_SOCKET, TCP_NODELAY, (const void *)&optVal, sizeof(optVal)); + + } + + // We now have a good socket connection with the debugging client, so we + // send it the protocol version we are using and what the name of the grammar + // is that we represent. + // + sprintf (message, "ANTLR %d\n", delboy->PROTOCOL_VERSION); + sockSend (delboy->socket, message, (int)strlen(message)); + sprintf (message, "grammar \"%s\n", delboy->grammarFileName->chars); + sockSend (delboy->socket, message, (int)strlen(message)); + ack (delboy); + + delboy->initialized = ANTLR3_TRUE; + + return ANTLR3_TRUE; +} + +// Send the supplied text and wait for an ack from the client +static void +transmit(pANTLR3_DEBUG_EVENT_LISTENER delboy, const char * ptr) +{ + sockSend(delboy->socket, ptr, (int)strlen(ptr)); + ack(delboy); +} + +static void +ack (pANTLR3_DEBUG_EVENT_LISTENER delboy) +{ + // Local buffer to read the next character in to + // + char buffer; + int rCount; + + // Ack terminates in a line feed, so we just wait for + // one of those. Speed is not of the essence so we don't need + // to buffer the input or anything. + // + do + { + rCount = recv(delboy->socket, &buffer, 1, 0); + } + while (rCount == 1 && buffer != '\n'); + + // If the socket ws closed on us, then we will get an error or + // (with a graceful close), 0. We can assume the the debugger stopped for some reason + // (such as Java crashing again). Therefore we just exit the program + // completely if we don't get the terminating '\n' for the ack. + // + if (rCount != 1) + { + ANTLR3_PRINTF("Exiting debugger as remote client closed the socket\n"); + ANTLR3_PRINTF("Received char count was %d, and last char received was %02X\n", rCount, buffer); + exit(0); + } +} + +// Given a buffer string and a source string, serialize the +// text, escaping any newlines and linefeeds. We have no need +// for speed here, this is the debugger. +// +void +serializeText(pANTLR3_STRING buffer, pANTLR3_STRING text) +{ + ANTLR3_UINT32 c; + ANTLR3_UCHAR character; + + // strings lead in with a " + // + buffer->append(buffer, "\t\""); + + if (text == NULL) + { + return; + } + + // Now we replace linefeeds, newlines and the escape + // leadin character '%' with their hex equivalents + // prefixed by '%' + // + for (c = 0; c < text->len; c++) + { + switch (character = text->charAt(text, c)) + { + case '\n': + + buffer->append(buffer, "%0A"); + break; + + case '\r': + + buffer->append(buffer, "%0D"); + break; + + case '\\': + + buffer->append(buffer, "%25"); + break; + + // Other characters: The Song Remains the Same. + // + default: + + buffer->addc(buffer, character); + break; + } + } +} + +// Given a token, create a stringified version of it, in the supplied +// buffer. We create a string for this in the debug 'object', if there +// is not one there already, and then reuse it here if asked to do this +// again. +// +pANTLR3_STRING +serializeToken(pANTLR3_DEBUG_EVENT_LISTENER delboy, pANTLR3_COMMON_TOKEN t) +{ + // Do we already have a serialization buffer? + // + if (delboy->tokenString == NULL) + { + // No, so create one, using the string factory that + // the grammar name used, which is guaranteed to exist. + // 64 bytes will do us here for starters. + // + delboy->tokenString = delboy->grammarFileName->factory->newSize(delboy->grammarFileName->factory, 64); + } + + // Empty string + // + delboy->tokenString->set(delboy->tokenString, (const char *)""); + + // Now we serialize the elements of the token.Note that the debugger only + // uses 32 bits. + // + delboy->tokenString->addi(delboy->tokenString, (ANTLR3_INT32)(t->getTokenIndex(t))); + delboy->tokenString->addc(delboy->tokenString, '\t'); + delboy->tokenString->addi(delboy->tokenString, (ANTLR3_INT32)(t->getType(t))); + delboy->tokenString->addc(delboy->tokenString, '\t'); + delboy->tokenString->addi(delboy->tokenString, (ANTLR3_INT32)(t->getChannel(t))); + delboy->tokenString->addc(delboy->tokenString, '\t'); + delboy->tokenString->addi(delboy->tokenString, (ANTLR3_INT32)(t->getLine(t))); + delboy->tokenString->addc(delboy->tokenString, '\t'); + delboy->tokenString->addi(delboy->tokenString, (ANTLR3_INT32)(t->getCharPositionInLine(t))); + + // Now send the text that the token represents. + // + serializeText(delboy->tokenString, t->getText(t)); + + // Finally, as the debugger is a Java program it will expect to get UTF-8 + // encoded strings. We don't use UTF-8 internally to the C runtime, so we + // must force encode it. We have a method to do this in the string class, but + // it returns malloc space that we must free afterwards. + // + return delboy->tokenString->toUTF8(delboy->tokenString); +} + +// Given a tree node, create a stringified version of it in the supplied +// buffer. +// +pANTLR3_STRING +serializeNode(pANTLR3_DEBUG_EVENT_LISTENER delboy, pANTLR3_BASE_TREE node) +{ + pANTLR3_COMMON_TOKEN token; + + + // Do we already have a serialization buffer? + // + if (delboy->tokenString == NULL) + { + // No, so create one, using the string factory that + // the grammar name used, which is guaranteed to exist. + // 64 bytes will do us here for starters. + // + delboy->tokenString = delboy->grammarFileName->factory->newSize(delboy->grammarFileName->factory, 64); + } + + // Empty string + // + delboy->tokenString->set(delboy->tokenString, (const char *)""); + + // Protect against bugs/errors etc + // + if (node == NULL) + { + return delboy->tokenString; + } + + // Now we serialize the elements of the node.Note that the debugger only + // uses 32 bits. + // + delboy->tokenString->addc(delboy->tokenString, '\t'); + + // Adaptor ID + // + delboy->tokenString->addi(delboy->tokenString, delboy->adaptor->getUniqueID(delboy->adaptor, node)); + delboy->tokenString->addc(delboy->tokenString, '\t'); + + // Type of the current token (which may be imaginary) + // + delboy->tokenString->addi(delboy->tokenString, delboy->adaptor->getType(delboy->adaptor, node)); + + // See if we have an actual token or just an imaginary + // + token = delboy->adaptor->getToken(delboy->adaptor, node); + + delboy->tokenString->addc(delboy->tokenString, '\t'); + if (token != NULL) + { + // Real token + // + delboy->tokenString->addi(delboy->tokenString, (ANTLR3_INT32)(token->getLine(token))); + delboy->tokenString->addc(delboy->tokenString, ' '); + delboy->tokenString->addi(delboy->tokenString, (ANTLR3_INT32)(token->getCharPositionInLine(token))); + } + else + { + // Imaginary tokens have no location + // + delboy->tokenString->addi(delboy->tokenString, -1); + delboy->tokenString->addc(delboy->tokenString, '\t'); + delboy->tokenString->addi(delboy->tokenString, -1); + } + + // Start Index of the node + // + delboy->tokenString->addc(delboy->tokenString, '\t'); + delboy->tokenString->addi(delboy->tokenString, (ANTLR3_UINT32)(delboy->adaptor->getTokenStartIndex(delboy->adaptor, node))); + + // Now send the text that the node represents. + // + serializeText(delboy->tokenString, delboy->adaptor->getText(delboy->adaptor, node)); + + // Finally, as the debugger is a Java program it will expect to get UTF-8 + // encoded strings. We don't use UTF-8 internally to the C runtime, so we + // must force encode it. We have a method to do this in the string class, but + // there is no utf8 string implementation as of yet + // + return delboy->tokenString->toUTF8(delboy->tokenString); +} + +//------------------------------------------------------------------------------------------------------------------ +// EVENTS +// +static void +enterRule (pANTLR3_DEBUG_EVENT_LISTENER delboy, const char * grammarFileName, const char * ruleName) +{ + char buffer[512]; + + // Create the message (speed is not of the essence) + // + sprintf(buffer, "enterRule\t%s\t%s\n", grammarFileName, ruleName); + transmit(delboy, buffer); +} + +static void +enterAlt (pANTLR3_DEBUG_EVENT_LISTENER delboy, int alt) +{ + char buffer[512]; + + // Create the message (speed is not of the essence) + // + sprintf(buffer, "enterAlt\t%d\n", alt); + transmit(delboy, buffer); +} + +static void +exitRule (pANTLR3_DEBUG_EVENT_LISTENER delboy, const char * grammarFileName, const char * ruleName) +{ + char buffer[512]; + + // Create the message (speed is not of the essence) + // + sprintf(buffer, "exitRule\t%s\t%s\n", grammarFileName, ruleName); + transmit(delboy, buffer); +} + +static void +enterSubRule (pANTLR3_DEBUG_EVENT_LISTENER delboy, int decisionNumber) +{ + char buffer[512]; + + // Create the message (speed is not of the essence) + // + sprintf(buffer, "enterSubRule\t%d\n", decisionNumber); + transmit(delboy, buffer); +} + +static void +exitSubRule (pANTLR3_DEBUG_EVENT_LISTENER delboy, int decisionNumber) +{ + char buffer[512]; + + // Create the message (speed is not of the essence) + // + sprintf(buffer, "exitSubRule\t%d\n", decisionNumber); + transmit(delboy, buffer); +} + +static void +enterDecision (pANTLR3_DEBUG_EVENT_LISTENER delboy, int decisionNumber) +{ + char buffer[512]; + + // Create the message (speed is not of the essence) + // + sprintf(buffer, "enterDecision\t%d\n", decisionNumber); + transmit(delboy, buffer); + +} + +static void +exitDecision (pANTLR3_DEBUG_EVENT_LISTENER delboy, int decisionNumber) +{ + char buffer[512]; + + // Create the message (speed is not of the essence) + // + sprintf(buffer, "exitDecision\t%d\n", decisionNumber); + transmit(delboy, buffer); +} + +static void +consumeToken (pANTLR3_DEBUG_EVENT_LISTENER delboy, pANTLR3_COMMON_TOKEN t) +{ + pANTLR3_STRING msg; + + // Create the serialized token + // + msg = serializeToken(delboy, t); + + // Insert the debug event indicator + // + msg->insert8(msg, 0, "consumeToken\t"); + + msg->addc(msg, '\n'); + + // Transmit the message and wait for ack + // + transmit(delboy, (const char *)(msg->chars)); +} + +static void +consumeHiddenToken (pANTLR3_DEBUG_EVENT_LISTENER delboy, pANTLR3_COMMON_TOKEN t) +{ + pANTLR3_STRING msg; + + // Create the serialized token + // + msg = serializeToken(delboy, t); + + // Insert the debug event indicator + // + msg->insert8(msg, 0, "consumeHiddenToken\t"); + + msg->addc(msg, '\n'); + + // Transmit the message and wait for ack + // + transmit(delboy, (const char *)(msg->chars)); +} + +// Looking at the next token event. +// +static void +LT (pANTLR3_DEBUG_EVENT_LISTENER delboy, int i, pANTLR3_COMMON_TOKEN t) +{ + pANTLR3_STRING msg; + + if (t != NULL) + { + // Create the serialized token + // + msg = serializeToken(delboy, t); + + // Insert the index parameter + // + msg->insert8(msg, 0, "\t"); + msg->inserti(msg, 0, i); + + // Insert the debug event indicator + // + msg->insert8(msg, 0, "LT\t"); + + msg->addc(msg, '\n'); + + // Transmit the message and wait for ack + // + transmit(delboy, (const char *)(msg->chars)); + } +} + +static void +mark (pANTLR3_DEBUG_EVENT_LISTENER delboy, ANTLR3_MARKER marker) +{ + char buffer[128]; + + sprintf(buffer, "mark\t%d\n", (ANTLR3_UINT32)(marker & 0xFFFFFFFF)); + + // Transmit the message and wait for ack + // + transmit(delboy, buffer); +} + +static void +rewindMark (pANTLR3_DEBUG_EVENT_LISTENER delboy, ANTLR3_MARKER marker) +{ + char buffer[128]; + + sprintf(buffer, "rewind\t%d\n", (ANTLR3_UINT32)(marker & 0xFFFFFFFF)); + + // Transmit the message and wait for ack + // + transmit(delboy, buffer); + +} + +static void +rewindLast (pANTLR3_DEBUG_EVENT_LISTENER delboy) +{ + transmit(delboy, (const char *)"rewind\n"); +} + +static void +beginBacktrack (pANTLR3_DEBUG_EVENT_LISTENER delboy, int level) +{ + char buffer[128]; + + sprintf(buffer, "beginBacktrack\t%d\n", (ANTLR3_UINT32)(level & 0xFFFFFFFF)); + + // Transmit the message and wait for ack + // + transmit(delboy, buffer); +} + +static void +endBacktrack (pANTLR3_DEBUG_EVENT_LISTENER delboy, int level, ANTLR3_BOOLEAN successful) +{ + char buffer[128]; + + sprintf(buffer, "endBacktrack\t%d\t%d\n", level, successful); + + // Transmit the message and wait for ack + // + transmit(delboy, buffer); +} + +static void +location (pANTLR3_DEBUG_EVENT_LISTENER delboy, int line, int pos) +{ + char buffer[128]; + + sprintf(buffer, "location\t%d\t%d\n", line, pos); + + // Transmit the message and wait for ack + // + transmit(delboy, buffer); +} + +static void +recognitionException (pANTLR3_DEBUG_EVENT_LISTENER delboy, pANTLR3_EXCEPTION e) +{ + char buffer[256]; + + sprintf(buffer, "exception\t%s\t%d\t%d\t%d\n", (char *)(e->name), (ANTLR3_INT32)(e->index), e->line, e->charPositionInLine); + + // Transmit the message and wait for ack + // + transmit(delboy, buffer); +} + +static void +beginResync (pANTLR3_DEBUG_EVENT_LISTENER delboy) +{ + transmit(delboy, (const char *)"beginResync\n"); +} + +static void +endResync (pANTLR3_DEBUG_EVENT_LISTENER delboy) +{ + transmit(delboy, (const char *)"endResync\n"); +} + +static void +semanticPredicate (pANTLR3_DEBUG_EVENT_LISTENER delboy, ANTLR3_BOOLEAN result, const char * predicate) +{ + unsigned char * buffer; + unsigned char * out; + + if (predicate != NULL) + { + buffer = (unsigned char *)ANTLR3_MALLOC(64 + 2*strlen(predicate)); + + if (buffer != NULL) + { + out = buffer + sprintf((char *)buffer, "semanticPredicate\t%s\t", result == ANTLR3_TRUE ? "true" : "false"); + + while (*predicate != '\0') + { + switch(*predicate) + { + case '\n': + + *out++ = '%'; + *out++ = '0'; + *out++ = 'A'; + break; + + case '\r': + + *out++ = '%'; + *out++ = '0'; + *out++ = 'D'; + break; + + case '%': + + *out++ = '%'; + *out++ = '0'; + *out++ = 'D'; + break; + + + default: + + *out++ = *predicate; + break; + } + + predicate++; + } + *out++ = '\n'; + *out++ = '\0'; + } + + // Send it and wait for the ack + // + transmit(delboy, (const char *)buffer); + } +} + +#ifdef ANTLR3_WINDOWS +#pragma warning (push) +#pragma warning (disable : 4100) +#endif + +static void +commence (pANTLR3_DEBUG_EVENT_LISTENER delboy) +{ + // Nothing to see here + // +} + +#ifdef ANTLR3_WINDOWS +#pragma warning (pop) +#endif + +static void +terminate (pANTLR3_DEBUG_EVENT_LISTENER delboy) +{ + // Terminate sequence + // + sockSend(delboy->socket, "terminate\n", 10); // Send out the command +} + +//---------------------------------------------------------------- +// Tree parsing events +// +static void +consumeNode (pANTLR3_DEBUG_EVENT_LISTENER delboy, pANTLR3_BASE_TREE t) +{ + pANTLR3_STRING buffer; + + buffer = serializeNode (delboy, t); + + // Now prepend the command + // + buffer->insert8 (buffer, 0, "consumeNode\t"); + buffer->addc (buffer, '\n'); + + // Send to the debugger and wait for the ack + // + transmit (delboy, (const char *)(delboy->tokenString->toUTF8(delboy->tokenString)->chars)); +} + +static void +LTT (pANTLR3_DEBUG_EVENT_LISTENER delboy, int i, pANTLR3_BASE_TREE t) +{ + pANTLR3_STRING buffer; + + buffer = serializeNode (delboy, t); + + // Now prepend the command + // + buffer->insert8 (buffer, 0, "\t"); + buffer->inserti (buffer, 0, i); + buffer->insert8 (buffer, 0, "LN\t"); + buffer->addc (buffer, '\n'); + + // Send to the debugger and wait for the ack + // + transmit (delboy, (const char *)(delboy->tokenString->toUTF8(delboy->tokenString)->chars)); +} + +static void +nilNode (pANTLR3_DEBUG_EVENT_LISTENER delboy, pANTLR3_BASE_TREE t) +{ + char buffer[128]; + sprintf(buffer, "nilNode\t%d\n", delboy->adaptor->getUniqueID(delboy->adaptor, t)); + transmit(delboy, buffer); +} + +static void +createNode (pANTLR3_DEBUG_EVENT_LISTENER delboy, pANTLR3_BASE_TREE t) +{ + // Do we already have a serialization buffer? + // + if (delboy->tokenString == NULL) + { + // No, so create one, using the string factory that + // the grammar name used, which is guaranteed to exist. + // 64 bytes will do us here for starters. + // + delboy->tokenString = delboy->grammarFileName->factory->newSize(delboy->grammarFileName->factory, 64); + } + + // Empty string + // + delboy->tokenString->set8(delboy->tokenString, (const char *)"createNodeFromTokenElements "); + + // Now we serialize the elements of the node.Note that the debugger only + // uses 32 bits. + // + // Adaptor ID + // + delboy->tokenString->addi(delboy->tokenString, delboy->adaptor->getUniqueID(delboy->adaptor, t)); + delboy->tokenString->addc(delboy->tokenString, '\t'); + + // Type of the current token (which may be imaginary) + // + delboy->tokenString->addi(delboy->tokenString, delboy->adaptor->getType(delboy->adaptor, t)); + + // The text that this node represents + // + serializeText(delboy->tokenString, delboy->adaptor->getText(delboy->adaptor, t)); + delboy->tokenString->addc(delboy->tokenString, '\n'); + + // Finally, as the debugger is a Java program it will expect to get UTF-8 + // encoded strings. We don't use UTF-8 internally to the C runtime, so we + // must force encode it. We have a method to do this in the string class, but + // there is no utf8 string implementation as of yet + // + transmit(delboy, (const char *)(delboy->tokenString->toUTF8(delboy->tokenString)->chars)); + +} +static void +errorNode (pANTLR3_DEBUG_EVENT_LISTENER delboy, pANTLR3_BASE_TREE t) +{ + // Do we already have a serialization buffer? + // + if (delboy->tokenString == NULL) + { + // No, so create one, using the string factory that + // the grammar name used, which is guaranteed to exist. + // 64 bytes will do us here for starters. + // + delboy->tokenString = delboy->grammarFileName->factory->newSize(delboy->grammarFileName->factory, 64); + } + + // Empty string + // + delboy->tokenString->set8(delboy->tokenString, (const char *)"errorNode\t"); + + // Now we serialize the elements of the node.Note that the debugger only + // uses 32 bits. + // + // Adaptor ID + // + delboy->tokenString->addi(delboy->tokenString, delboy->adaptor->getUniqueID(delboy->adaptor, t)); + delboy->tokenString->addc(delboy->tokenString, '\t'); + + // Type of the current token (which is an error) + // + delboy->tokenString->addi(delboy->tokenString, ANTLR3_TOKEN_INVALID); + + // The text that this node represents + // + serializeText(delboy->tokenString, delboy->adaptor->getText(delboy->adaptor, t)); + delboy->tokenString->addc(delboy->tokenString, '\n'); + + // Finally, as the debugger is a Java program it will expect to get UTF-8 + // encoded strings. We don't use UTF-8 internally to the C runtime, so we + // must force encode it. We have a method to do this in the string class, but + // there is no utf8 string implementation as of yet + // + transmit(delboy, (const char *)(delboy->tokenString->toUTF8(delboy->tokenString)->chars)); + +} + +static void +createNodeTok (pANTLR3_DEBUG_EVENT_LISTENER delboy, pANTLR3_BASE_TREE node, pANTLR3_COMMON_TOKEN token) +{ + char buffer[128]; + + sprintf(buffer, "createNode\t%d\t%d\n", delboy->adaptor->getUniqueID(delboy->adaptor, node), (ANTLR3_UINT32)token->getTokenIndex(token)); + + transmit(delboy, buffer); +} + +static void +becomeRoot (pANTLR3_DEBUG_EVENT_LISTENER delboy, pANTLR3_BASE_TREE newRoot, pANTLR3_BASE_TREE oldRoot) +{ + char buffer[128]; + + sprintf(buffer, "becomeRoot\t%d\t%d\n", delboy->adaptor->getUniqueID(delboy->adaptor, newRoot), + delboy->adaptor->getUniqueID(delboy->adaptor, oldRoot) + ); + transmit(delboy, buffer); +} + + +static void +addChild (pANTLR3_DEBUG_EVENT_LISTENER delboy, pANTLR3_BASE_TREE root, pANTLR3_BASE_TREE child) +{ + char buffer[128]; + + sprintf(buffer, "addChild\t%d\t%d\n", delboy->adaptor->getUniqueID(delboy->adaptor, root), + delboy->adaptor->getUniqueID(delboy->adaptor, child) + ); + transmit(delboy, buffer); +} + +static void +setTokenBoundaries (pANTLR3_DEBUG_EVENT_LISTENER delboy, pANTLR3_BASE_TREE t, ANTLR3_MARKER tokenStartIndex, ANTLR3_MARKER tokenStopIndex) +{ + char buffer[128]; + + sprintf(buffer, "becomeRoot\t%d\t%d\t%d\n", delboy->adaptor->getUniqueID(delboy->adaptor, t), + (ANTLR3_UINT32)tokenStartIndex, + (ANTLR3_UINT32)tokenStopIndex + ); + transmit(delboy, buffer); +} +#endif + diff --git a/impl/antlr/libantlr3c-3.4/src/antlr3encodings.c b/impl/antlr/libantlr3c-3.4/src/antlr3encodings.c new file mode 100644 index 0000000..c7a4b77 --- /dev/null +++ b/impl/antlr/libantlr3c-3.4/src/antlr3encodings.c @@ -0,0 +1,50 @@ +/** \File + * Provides basic utility functions to convert between + * the various Unicode character conversions. There are of + * course various packages that could be used instead of these + * functions, but then the Antlr 3 C runtime would be dependant + * on the particular package. Using ICU for this is a good idea if + * your project is already dependant on it. + */ + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <antlr3.h> + +/// Convert 8 bit character to ANTLR char form. +/// +/// \param[in] inc Input character to transform from 8 bit form. +/// \return ANTLR3_UCHAR encoding of the character. +/// +ANTLR3_API +ANTLR3_UCHAR antlr3c8toAntlrc(ANTLR3_INT8 inc) +{ + return (ANTLR3_UCHAR)(inc); +} diff --git a/impl/antlr/libantlr3c-3.4/src/antlr3exception.c b/impl/antlr/libantlr3c-3.4/src/antlr3exception.c new file mode 100644 index 0000000..339721c --- /dev/null +++ b/impl/antlr/libantlr3c-3.4/src/antlr3exception.c @@ -0,0 +1,190 @@ +/** \file + * Contains default functions for creating and destroying as well as + * otherwise handling ANTLR3 standard exception structures. + */ + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <antlr3exception.h> + +static void antlr3ExceptionPrint(pANTLR3_EXCEPTION ex); +static void antlr3ExceptionFree (pANTLR3_EXCEPTION ex); + +/** + * \brief + * Creates a new ANTLR3 exception structure + * + * \param[in] exception + * One of the ANTLR3_xxx_EXCEPTION indicators such as #ANTLR3_RECOGNITION_EXCEPTION + * + * \param[in] message + * Pointer to message string + * + * \param[in] freeMessage + * Set to ANTLR3_TRUE if the message parameter should be freed by a call to + * ANTLR3_FREE() when the exception is destroyed. + * + * \returns + * Pointer to newly initialized exception structure, or an ANTLR3_ERR_xx defined value + * upon failure. + * + * An exception is 'thrown' by a recognizer when input is seen that is not predicted by + * the grammar productions or when some other error condition occurs. In C we do not have + * the luxury of try and catch blocks, so exceptions are added in the order they occur to + * a list in the baserecognizer structure. The last one to be thrown is inserted at the head of + * the list and the one currently installed is pointed to by the newly installed exception. + * + * \remarks + * After an exception is created, you may add a pointer to your own structure and a pointer + * to a function to free this structure when the exception is destroyed. + * + * \see + * ANTLR3_EXCEPTION + */ +pANTLR3_EXCEPTION +antlr3ExceptionNew(ANTLR3_UINT32 exception, void * name, void * message, ANTLR3_BOOLEAN freeMessage) +{ + pANTLR3_EXCEPTION ex; + + /* Allocate memory for the structure + */ + ex = (pANTLR3_EXCEPTION) ANTLR3_CALLOC(1, sizeof(ANTLR3_EXCEPTION)); + + /* Check for memory allocation + */ + if (ex == NULL) + { + return NULL; + } + + ex->name = name; /* Install exception name */ + ex->type = exception; /* Install the exception number */ + ex->message = message; /* Install message string */ + + /* Indicate whether the string should be freed if exception is destroyed + */ + ex->freeMessage = freeMessage; + + /* Install the API + */ + ex->print = antlr3ExceptionPrint; + ex->freeEx = antlr3ExceptionFree; + + return ex; +} + +/** + * \brief + * Prints out the message in all the exceptions in the supplied chain. + * + * \param[in] ex + * Pointer to the exception structure to print. + * + * \remarks + * You may wish to override this function by installing a pointer to a new function + * in the base recognizer context structure. + * + * \see + * ANTLR3_BASE_RECOGNIZER + */ +static void +antlr3ExceptionPrint(pANTLR3_EXCEPTION ex) +{ + /* Ensure valid pointer + */ + while (ex != NULL) + { + /* Number if no message, else the message + */ + if (ex->message == NULL) + { + ANTLR3_FPRINTF(stderr, "ANTLR3_EXCEPTION number %d (%08X).\n", ex->type, ex->type); + } + else + { + ANTLR3_FPRINTF(stderr, "ANTLR3_EXCEPTION: %s\n", (char *)(ex->message)); + } + + /* Move to next in the chain (if any) + */ + ex = ex->nextException; + } + + return; +} + +/** + * \brief + * Frees up a chain of ANTLR3 exceptions + * + * \param[in] ex + * Pointer to the first exception in the chain to free. + * + * \see + * ANTLR3_EXCEPTION + */ +static void +antlr3ExceptionFree(pANTLR3_EXCEPTION ex) +{ + pANTLR3_EXCEPTION next; + + /* Ensure valid pointer + */ + while (ex != NULL) + { + /* Pick up anythign following now, before we free the + * current memory block. + */ + next = ex->nextException; + + /* Free the message pointer if advised to + */ + if (ex->freeMessage == ANTLR3_TRUE) + { + ANTLR3_FREE(ex->message); + } + + /* Call the programmer's custom free routine if advised to + */ + if (ex->freeCustom != NULL) + { + ex->freeCustom(ex->custom); + } + + /* Free the actual structure itself + */ + ANTLR3_FREE(ex); + + ex = next; + } + + return; +} + diff --git a/impl/antlr/libantlr3c-3.4/src/antlr3filestream.c b/impl/antlr/libantlr3c-3.4/src/antlr3filestream.c new file mode 100644 index 0000000..4430ffe --- /dev/null +++ b/impl/antlr/libantlr3c-3.4/src/antlr3filestream.c @@ -0,0 +1,474 @@ +/** \file + * \brief The ANTLR3 C filestream is used when the source character stream + * is a filesystem based input set and all the characters in the filestream + * can be loaded at once into memory and away the lexer goes. + * + * A number of initializers are provided in order that various character + * sets can be supported from input files. The ANTLR3 C runtime expects + * to deal with UTF32 characters only (the reasons for this are to + * do with the simplification of C code when using this form of Unicode + * encoding, though this is not a panacea. More information can be + * found on this by consulting: + * - http://www.unicode.org/versions/Unicode4.0.0/ch02.pdf#G11178 + * Where a well grounded discussion of the encoding formats available + * may be found. + * + */ + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <antlr3.h> + +static void setupInputStream (pANTLR3_INPUT_STREAM input); +static pANTLR3_INPUT_STREAM antlr3CreateFileStream (pANTLR3_UINT8 fileName); +static pANTLR3_INPUT_STREAM antlr3CreateStringStream (pANTLR3_UINT8 data); + +ANTLR3_API pANTLR3_INPUT_STREAM +antlr3FileStreamNew(pANTLR3_UINT8 fileName, ANTLR3_UINT32 encoding) +{ + pANTLR3_INPUT_STREAM input; + + // First order of business is to read the file into some buffer space + // as just straight 8 bit bytes. Then we will work out the encoding and + // byte order and adjust the API functions that are installed for the + // default 8Bit stream accordingly. + // + input = antlr3CreateFileStream(fileName); + if (input == NULL) + { + return NULL; + } + + // We have the data in memory now so we can deal with it according to + // the encoding scheme we were given by the user. + // + input->encoding = encoding; + + // Now we need to work out the endian type and install any + // API functions that differ from 8Bit + // + setupInputStream(input); + + // Now we can set up the file name + // + input->istream->streamName = input->strFactory->newStr8(input->strFactory, fileName); + input->fileName = input->istream->streamName; + + return input; +} + + +ANTLR3_API pANTLR3_INPUT_STREAM +antlr3StringStreamNew(pANTLR3_UINT8 data, ANTLR3_UINT32 encoding, ANTLR3_UINT32 size, pANTLR3_UINT8 name) +{ + pANTLR3_INPUT_STREAM input; + + // First order of business is to set up the stream and install the data pointer. + // Then we will work out the encoding and byte order and adjust the API functions that are installed for the + // default 8Bit stream accordingly. + // + input = antlr3CreateStringStream(data); + if (input == NULL) + { + return NULL; + } + + // Size (in bytes) of the given 'string' + // + input->sizeBuf = size; + + // We have the data in memory now so we can deal with it according to + // the encoding scheme we were given by the user. + // + input->encoding = encoding; + + // Now we need to work out the endian type and install any + // API functions that differ from 8Bit + // + setupInputStream(input); + + // Now we can set up the file name + // + input->istream->streamName = input->strFactory->newStr8(input->strFactory, name); + input->fileName = input->istream->streamName; + + return input; +} + + +/// Determine endianess of the input stream and install the +/// API required for the encoding in that format. +/// +static void +setupInputStream(pANTLR3_INPUT_STREAM input) +{ + ANTLR3_BOOLEAN isBigEndian; + + // Used to determine the endianness of the machine we are currently + // running on. + // + ANTLR3_UINT16 bomTest = 0xFEFF; + + // What endianess is the machine we are running on? If the incoming + // encoding endianess is the same as this machine's natural byte order + // then we can use more efficient API calls. + // + if (*((pANTLR3_UINT8)(&bomTest)) == 0xFE) + { + isBigEndian = ANTLR3_TRUE; + } + else + { + isBigEndian = ANTLR3_FALSE; + } + + // What encoding did the user tell us {s}he thought it was? I am going + // to get sick of the questions on antlr-interest, I know I am. + // + switch (input->encoding) + { + case ANTLR3_ENC_UTF8: + + // See if there is a BOM at the start of this UTF-8 sequence + // and just eat it if there is. Windows .TXT files have this for instance + // as it identifies UTF-8 even though it is of no consequence for byte order + // as UTF-8 does not have a byte order. + // + if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0xEF + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0xBB + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+2)) == 0xBF + ) + { + // The UTF8 BOM is present so skip it + // + input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 3); + } + + // Install the UTF8 input routines + // + antlr3UTF8SetupStream(input); + break; + + case ANTLR3_ENC_UTF16: + + // See if there is a BOM at the start of the input. If not then + // we assume that the byte order is the natural order of this + // machine (or it is really UCS2). If there is a BOM we determine if the encoding + // is the same as the natural order of this machine. + // + if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0xFE + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0xFF + ) + { + // BOM Present, indicates Big Endian + // + input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 2); + + antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_TRUE); + } + else if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0xFF + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0xFE + ) + { + // BOM present, indicates Little Endian + // + input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 2); + + antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_FALSE); + } + else + { + // No BOM present, assume local computer byte order + // + antlr3UTF16SetupStream(input, isBigEndian, isBigEndian); + } + break; + + case ANTLR3_ENC_UTF32: + + // See if there is a BOM at the start of the input. If not then + // we assume that the byte order is the natural order of this + // machine. If there is we determine if the encoding + // is the same as the natural order of this machine. + // + if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0x00 + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0x00 + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+2)) == 0xFE + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+3)) == 0xFF + ) + { + // BOM Present, indicates Big Endian + // + input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 4); + + antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_TRUE); + } + else if ( (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar)) == 0xFF + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0xFE + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0x00 + && (ANTLR3_UINT8)(*((pANTLR3_UINT8)input->nextChar+1)) == 0x00 + ) + { + // BOM present, indicates Little Endian + // + input->nextChar = (void *)((pANTLR3_UINT8)input->nextChar + 4); + + antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_FALSE); + } + else + { + // No BOM present, assume local computer byte order + // + antlr3UTF32SetupStream(input, isBigEndian, isBigEndian); + } + break; + + case ANTLR3_ENC_UTF16BE: + + // Encoding is definately Big Endian with no BOM + // + antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_TRUE); + break; + + case ANTLR3_ENC_UTF16LE: + + // Encoding is definately Little Endian with no BOM + // + antlr3UTF16SetupStream(input, isBigEndian, ANTLR3_FALSE); + break; + + case ANTLR3_ENC_UTF32BE: + + // Encoding is definately Big Endian with no BOM + // + antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_TRUE); + break; + + case ANTLR3_ENC_UTF32LE: + + // Encoding is definately Little Endian with no BOM + // + antlr3UTF32SetupStream(input, isBigEndian, ANTLR3_FALSE); + break; + + case ANTLR3_ENC_EBCDIC: + + // EBCDIC is basically the same as ASCII but with an on the + // fly translation to ASCII + // + antlr3EBCDICSetupStream(input); + break; + + case ANTLR3_ENC_8BIT: + default: + + // Standard 8bit/ASCII + // + antlr38BitSetupStream(input); + break; + } +} + +/** \brief Use the contents of an operating system file as the input + * for an input stream. + * + * \param fileName Name of operating system file to read. + * \return + * - Pointer to new input stream context upon success + * - One of the ANTLR3_ERR_ defines on error. + */ +static pANTLR3_INPUT_STREAM +antlr3CreateFileStream(pANTLR3_UINT8 fileName) +{ + // Pointer to the input stream we are going to create + // + pANTLR3_INPUT_STREAM input; + ANTLR3_UINT32 status; + + if (fileName == NULL) + { + return NULL; + } + + // Allocate memory for the input stream structure + // + input = (pANTLR3_INPUT_STREAM) + ANTLR3_CALLOC(1, sizeof(ANTLR3_INPUT_STREAM)); + + if (input == NULL) + { + return NULL; + } + + // Structure was allocated correctly, now we can read the file. + // + status = antlr3read8Bit(input, fileName); + + // Call the common 8 bit input stream handler + // initialization. + // + antlr3GenericSetupStream(input); + + // However if the file was not there or something then we + // need to close. Have to wait until here as we cannot call + // close until the API is installed of course. + // + if (status != ANTLR3_SUCCESS) + { + input->close(input); + return NULL; + } + + return input; +} + +ANTLR3_API ANTLR3_UINT32 +antlr3read8Bit(pANTLR3_INPUT_STREAM input, pANTLR3_UINT8 fileName) +{ + ANTLR3_FDSC infile; + ANTLR3_UINT32 fSize; + + /* Open the OS file in read binary mode + */ + infile = antlr3Fopen(fileName, "rb"); + + /* Check that it was there + */ + if (infile == NULL) + { + return (ANTLR3_UINT32)ANTLR3_ERR_NOFILE; + } + + /* It was there, so we can read the bytes now + */ + fSize = antlr3Fsize(fileName); /* Size of input file */ + + /* Allocate buffer for this input set + */ + input->data = ANTLR3_MALLOC((size_t)fSize); + input->sizeBuf = fSize; + + if (input->data == NULL) + { + return (ANTLR3_UINT32)ANTLR3_ERR_NOMEM; + } + + input->isAllocated = ANTLR3_TRUE; + + /* Now we read the file. Characters are not converted to + * the internal ANTLR encoding until they are read from the buffer + */ + antlr3Fread(infile, fSize, input->data); + + /* And close the file handle + */ + antlr3Fclose(infile); + + return ANTLR3_SUCCESS; +} + +/** \brief Open an operating system file and return the descriptor + * We just use the common open() and related functions here. + * Later we might find better ways on systems + * such as Windows and OpenVMS for instance. But the idea is to read the + * while file at once anyway, so it may be irrelevant. + */ +ANTLR3_API ANTLR3_FDSC +antlr3Fopen(pANTLR3_UINT8 filename, const char * mode) +{ + return (ANTLR3_FDSC)fopen((const char *)filename, mode); +} + +/** \brief Close an operating system file and free any handles + * etc. + */ +ANTLR3_API void +antlr3Fclose(ANTLR3_FDSC fd) +{ + fclose(fd); +} +ANTLR3_API ANTLR3_UINT32 +antlr3Fsize(pANTLR3_UINT8 fileName) +{ + struct _stat statbuf; + + _stat((const char *)fileName, &statbuf); + + return (ANTLR3_UINT32)statbuf.st_size; +} + +ANTLR3_API ANTLR3_UINT32 +antlr3Fread(ANTLR3_FDSC fdsc, ANTLR3_UINT32 count, void * data) +{ + return (ANTLR3_UINT32)fread(data, (size_t)count, 1, fdsc); +} + + +/** \brief Use the supplied 'string' as input to the stream + * + * \param data Pointer to the input data + * \return + * - Pointer to new input stream context upon success + * - NULL defines on error. + */ +static pANTLR3_INPUT_STREAM +antlr3CreateStringStream(pANTLR3_UINT8 data) +{ + // Pointer to the input stream we are going to create + // + pANTLR3_INPUT_STREAM input; + + if (data == NULL) + { + return NULL; + } + + // Allocate memory for the input stream structure + // + input = (pANTLR3_INPUT_STREAM) + ANTLR3_CALLOC(1, sizeof(ANTLR3_INPUT_STREAM)); + + if (input == NULL) + { + return NULL; + } + + // Structure was allocated correctly, now we can install the pointer + // + input->data = data; + input->isAllocated = ANTLR3_FALSE; + + // Call the common 8 bit input stream handler + // initialization. + // + antlr3GenericSetupStream(input); + + return input; +}
\ No newline at end of file diff --git a/impl/antlr/libantlr3c-3.4/src/antlr3inputstream.c b/impl/antlr/libantlr3c-3.4/src/antlr3inputstream.c new file mode 100644 index 0000000..e3f1c26 --- /dev/null +++ b/impl/antlr/libantlr3c-3.4/src/antlr3inputstream.c @@ -0,0 +1,2050 @@ +/// \file +/// Base functions to initialize and manipulate any input stream +/// + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <antlr3input.h> + +// ----------------------------------- +// Generic 8 bit input such as latin-1 +// + +// 8Bit INT Stream API +// +static void antlr38BitConsume (pANTLR3_INT_STREAM is); +static ANTLR3_UCHAR antlr38BitLA (pANTLR3_INT_STREAM is, ANTLR3_INT32 la); +static ANTLR3_UCHAR antlr38BitLA_ucase (pANTLR3_INT_STREAM is, ANTLR3_INT32 la); +static ANTLR3_MARKER antlr38BitIndex (pANTLR3_INT_STREAM is); +static ANTLR3_MARKER antlr38BitMark (pANTLR3_INT_STREAM is); +static void antlr38BitRewind (pANTLR3_INT_STREAM is, ANTLR3_MARKER mark); +static void antlr38BitRewindLast (pANTLR3_INT_STREAM is); +static void antlr38BitRelease (pANTLR3_INT_STREAM is, ANTLR3_MARKER mark); +static void antlr38BitSeek (pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint); +static pANTLR3_STRING antlr38BitGetSourceName (pANTLR3_INT_STREAM is); + +// 8Bit Charstream API functions +// +static void antlr3InputClose (pANTLR3_INPUT_STREAM input); +static void antlr3InputReset (pANTLR3_INPUT_STREAM input); +static void antlr38BitReuse (pANTLR3_INPUT_STREAM input, pANTLR3_UINT8 inString, ANTLR3_UINT32 size, pANTLR3_UINT8 name); +static void * antlr38BitLT (pANTLR3_INPUT_STREAM input, ANTLR3_INT32 lt); +static ANTLR3_UINT32 antlr38BitSize (pANTLR3_INPUT_STREAM input); +static pANTLR3_STRING antlr38BitSubstr (pANTLR3_INPUT_STREAM input, ANTLR3_MARKER start, ANTLR3_MARKER stop); +static ANTLR3_UINT32 antlr38BitGetLine (pANTLR3_INPUT_STREAM input); +static void * antlr38BitGetLineBuf (pANTLR3_INPUT_STREAM input); +static ANTLR3_UINT32 antlr38BitGetCharPosition (pANTLR3_INPUT_STREAM input); +static void antlr38BitSetLine (pANTLR3_INPUT_STREAM input, ANTLR3_UINT32 line); +static void antlr38BitSetCharPosition (pANTLR3_INPUT_STREAM input, ANTLR3_UINT32 position); +static void antlr38BitSetNewLineChar (pANTLR3_INPUT_STREAM input, ANTLR3_UINT32 newlineChar); +static void antlr38BitSetUcaseLA (pANTLR3_INPUT_STREAM input, ANTLR3_BOOLEAN flag); + +// ----------------------------------- +// UTF16 (also covers UCS2) +// +// INT Stream API +// +static void antlr3UTF16Consume (pANTLR3_INT_STREAM is); +static ANTLR3_UCHAR antlr3UTF16LA (pANTLR3_INT_STREAM is, ANTLR3_INT32 la); +static void antlr3UTF16ConsumeLE (pANTLR3_INT_STREAM is); +static ANTLR3_UCHAR antlr3UTF16LALE (pANTLR3_INT_STREAM is, ANTLR3_INT32 la); +static void antlr3UTF16ConsumeBE (pANTLR3_INT_STREAM is); +static ANTLR3_UCHAR antlr3UTF16LABE (pANTLR3_INT_STREAM is, ANTLR3_INT32 la); +static ANTLR3_MARKER antlr3UTF16Index (pANTLR3_INT_STREAM is); +static void antlr3UTF16Seek (pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint); + +// UTF16 Charstream API functions +// +static pANTLR3_STRING antlr3UTF16Substr (pANTLR3_INPUT_STREAM input, ANTLR3_MARKER start, ANTLR3_MARKER stop); + +// ----------------------------------- +// UTF32 (also covers UCS2) +// +// INT Stream API +// +static void antlr3UTF32Consume (pANTLR3_INT_STREAM is); +static ANTLR3_UCHAR antlr3UTF32LA (pANTLR3_INT_STREAM is, ANTLR3_INT32 la); +static ANTLR3_UCHAR antlr3UTF32LALE (pANTLR3_INT_STREAM is, ANTLR3_INT32 la); +static ANTLR3_UCHAR antlr3UTF32LABE (pANTLR3_INT_STREAM is, ANTLR3_INT32 la); +static ANTLR3_MARKER antlr3UTF32Index (pANTLR3_INT_STREAM is); +static void antlr3UTF32Seek (pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint); + +// UTF16 Charstream API functions +// +static pANTLR3_STRING antlr3UTF32Substr (pANTLR3_INPUT_STREAM input, ANTLR3_MARKER start, ANTLR3_MARKER stop); + +// ------------------------------------ +// UTF-8 +// +static void antlr3UTF8Consume (pANTLR3_INT_STREAM is); +static ANTLR3_UCHAR antlr3UTF8LA (pANTLR3_INT_STREAM is, ANTLR3_INT32 la); + +// ------------------------------------ +// EBCDIC +// +static ANTLR3_UCHAR antlr3EBCDICLA (pANTLR3_INT_STREAM is, ANTLR3_INT32 la); + +/// \brief Common function to setup function interface for an 8 bit input stream. +/// +/// \param input Input stream context pointer +/// +/// \remark +/// - Many of the 8 bit oriented file stream handling functions will be usable +/// by any or at least some, other input streams. Therefore it is perfectly acceptable +/// to call this function to install the 8Bit handler then override just those functions +/// that would not work for the particular input encoding, such as consume for instance. +/// +void +antlr38BitSetupStream (pANTLR3_INPUT_STREAM input) +{ + // Build a string factory for this stream + // + input->strFactory = antlr3StringFactoryNew(input->encoding); + + // Default stream API set up is for 8Bit, so we are done + // +} + +void +antlr3GenericSetupStream (pANTLR3_INPUT_STREAM input) +{ + /* Install function pointers for an 8 bit input + */ + + /* Allocate stream interface + */ + input->istream = antlr3IntStreamNew(); + input->istream->type = ANTLR3_CHARSTREAM; + input->istream->super = input; + + /* Intstream API + */ + input->istream->consume = antlr38BitConsume; // Consume the next 8 bit character in the buffer + input->istream->_LA = antlr38BitLA; // Return the UTF32 character at offset n (1 based) + input->istream->index = antlr38BitIndex; // Current index (offset from first character + input->istream->mark = antlr38BitMark; // Record the current lex state for later restore + input->istream->rewind = antlr38BitRewind; // How to rewind the input + input->istream->rewindLast = antlr38BitRewindLast; // How to rewind the input + input->istream->seek = antlr38BitSeek; // How to seek to a specific point in the stream + input->istream->release = antlr38BitRelease; // Reset marks after mark n + input->istream->getSourceName = antlr38BitGetSourceName; // Return a string that names the input source + + /* Charstream API + */ + input->close = antlr3InputClose; // Close down the stream completely + input->free = antlr3InputClose; // Synonym for free + input->reset = antlr3InputReset; // Reset input to start + input->reuse = antlr38BitReuse; // Install a new input string and reset + input->_LT = antlr38BitLT; // Same as _LA for 8 bit file + input->size = antlr38BitSize; // Return the size of the input buffer + input->substr = antlr38BitSubstr; // Return a string from the input stream + input->getLine = antlr38BitGetLine; // Return the current line number in the input stream + input->getLineBuf = antlr38BitGetLineBuf; // Return a pointer to the start of the current line being consumed + input->getCharPositionInLine = antlr38BitGetCharPosition; // Return the offset into the current line of input + input->setLine = antlr38BitSetLine; // Set the input stream line number (does not set buffer pointers) + input->setCharPositionInLine = antlr38BitSetCharPosition; // Set the offset in to the current line (does not set any pointers) + input->SetNewLineChar = antlr38BitSetNewLineChar; // Set the value of the newline trigger character + input->setUcaseLA = antlr38BitSetUcaseLA; // Changes the LA function to return upper case always + + input->charByteSize = 1; // Size in bytes of characters in this stream. + + /* Initialize entries for tables etc + */ + input->markers = NULL; + + /* Set up the input stream brand new + */ + input->reset(input); + + /* Install default line separator character (it can be replaced + * by the grammar programmer later) + */ + input->SetNewLineChar(input, (ANTLR3_UCHAR)'\n'); +} + +static pANTLR3_STRING +antlr38BitGetSourceName(pANTLR3_INT_STREAM is) +{ + return is->streamName; +} + +/** \brief Close down an input stream and free any memory allocated by it. + * + * \param input Input stream context pointer + */ +static void +antlr3InputClose(pANTLR3_INPUT_STREAM input) +{ + // Close any markers in the input stream + // + if (input->markers != NULL) + { + input->markers->free(input->markers); + input->markers = NULL; + } + + // Close the string factory + // + if (input->strFactory != NULL) + { + input->strFactory->close(input->strFactory); + } + + // Free the input stream buffer if we allocated it + // + if (input->isAllocated && input->data != NULL) + { + ANTLR3_FREE(input->data); + input->data = NULL; + } + + input->istream->free(input->istream); + + // Finally, free the space for the structure itself + // + ANTLR3_FREE(input); + + // Done + // +} + +static void +antlr38BitSetUcaseLA (pANTLR3_INPUT_STREAM input, ANTLR3_BOOLEAN flag) +{ + if (flag) + { + // Return the upper case version of the characters + // + input->istream->_LA = antlr38BitLA_ucase; + } + else + { + // Return the raw characters as they are in the buffer + // + input->istream->_LA = antlr38BitLA; + } +} + + +/** \brief Reset a re-startable input stream to the start + * + * \param input Input stream context pointer + */ +static void +antlr3InputReset(pANTLR3_INPUT_STREAM input) +{ + + input->nextChar = input->data; /* Input at first character */ + input->line = 1; /* starts at line 1 */ + input->charPositionInLine = -1; + input->currentLine = input->data; + input->markDepth = 0; /* Reset markers */ + + /* Clear out up the markers table if it is there + */ + if (input->markers != NULL) + { + input->markers->clear(input->markers); + } + else + { + /* Install a new markers table + */ + input->markers = antlr3VectorNew(0); + } +} + +/** Install a new source code in to a working input stream so that the + * input stream can be reused. + */ +static void +antlr38BitReuse(pANTLR3_INPUT_STREAM input, pANTLR3_UINT8 inString, ANTLR3_UINT32 size, pANTLR3_UINT8 name) +{ + input->isAllocated = ANTLR3_FALSE; + input->data = inString; + input->sizeBuf = size; + + // Now we can set up the file name. As we are reusing the stream, there may already + // be a string that we can reuse for holding the filename. + // + if (input->istream->streamName == NULL) + { + input->istream->streamName = input->strFactory->newStr(input->strFactory, name == NULL ? (pANTLR3_UINT8)"-memory-" : name); + input->fileName = input->istream->streamName; + } + else + { + input->istream->streamName->set(input->istream->streamName, (name == NULL ? (const char *)"-memory-" : (const char *)name)); + } + + input->reset(input); +} + +/** \brief Consume the next character in an 8 bit input stream + * + * \param input Input stream context pointer + */ +static void +antlr38BitConsume(pANTLR3_INT_STREAM is) +{ + pANTLR3_INPUT_STREAM input; + + input = ((pANTLR3_INPUT_STREAM) (is->super)); + + if ((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + /* Indicate one more character in this line + */ + input->charPositionInLine++; + + if ((ANTLR3_UCHAR)(*((pANTLR3_UINT8)input->nextChar)) == input->newlineChar) + { + /* Reset for start of a new line of input + */ + input->line++; + input->charPositionInLine = 0; + input->currentLine = (void *)(((pANTLR3_UINT8)input->nextChar) + 1); + } + + /* Increment to next character position + */ + input->nextChar = (void *)(((pANTLR3_UINT8)input->nextChar) + 1); + } +} + +/** \brief Return the input element assuming an 8 bit ascii input + * + * \param[in] input Input stream context pointer + * \param[in] la 1 based offset of next input stream element + * + * \return Next input character in internal ANTLR3 encoding (UTF32) + */ +static ANTLR3_UCHAR +antlr38BitLA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) +{ + pANTLR3_INPUT_STREAM input; + + input = ((pANTLR3_INPUT_STREAM) (is->super)); + + if (( ((pANTLR3_UINT8)input->nextChar) + la - 1) >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + return ANTLR3_CHARSTREAM_EOF; + } + else + { + return (ANTLR3_UCHAR)(*((pANTLR3_UINT8)input->nextChar + la - 1)); + } +} + +/** \brief Return the input element assuming an 8 bit input and + * always return the UPPER CASE character. + * Note that this is 8 bit and so we assume that the toupper + * function will use the correct locale for 8 bits. + * + * \param[in] input Input stream context pointer + * \param[in] la 1 based offset of next input stream element + * + * \return Next input character in internal ANTLR3 encoding (UTF32) + */ +static ANTLR3_UCHAR +antlr38BitLA_ucase (pANTLR3_INT_STREAM is, ANTLR3_INT32 la) +{ + pANTLR3_INPUT_STREAM input; + + input = ((pANTLR3_INPUT_STREAM) (is->super)); + + if (( ((pANTLR3_UINT8)input->nextChar) + la - 1) >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + return ANTLR3_CHARSTREAM_EOF; + } + else + { + return (ANTLR3_UCHAR)toupper((*((pANTLR3_UINT8)input->nextChar + la - 1))); + } +} + + +/** \brief Return the input element assuming an 8 bit ascii input + * + * \param[in] input Input stream context pointer + * \param[in] lt 1 based offset of next input stream element + * + * \return Next input character in internal ANTLR3 encoding (UTF32) + */ +static void * +antlr38BitLT(pANTLR3_INPUT_STREAM input, ANTLR3_INT32 lt) +{ + /* Casting is horrible but it means no warnings and LT should never be called + * on a character stream anyway I think. If it is then, the void * will need to be + * cast back in a similar manner. Yuck! But this means that LT for Token streams and + * tree streams is correct. + */ + return (ANTLR3_FUNC_PTR(input->istream->_LA(input->istream, lt))); +} + +/** \brief Calculate the current index in the output stream. + * \param[in] input Input stream context pointer + */ +static ANTLR3_MARKER +antlr38BitIndex(pANTLR3_INT_STREAM is) +{ + pANTLR3_INPUT_STREAM input; + + input = ((pANTLR3_INPUT_STREAM) (is->super)); + + return (ANTLR3_MARKER)(((pANTLR3_UINT8)input->nextChar)); +} + +/** \brief Return the size of the current input stream, as an 8Bit file + * which in this case is the total input. Other implementations may provide + * more sophisticated implementations to deal with non-recoverable streams + * and so on. + * + * \param[in] input Input stream context pointer + */ +static ANTLR3_UINT32 +antlr38BitSize(pANTLR3_INPUT_STREAM input) +{ + return input->sizeBuf; +} + +/** \brief Mark the current input point in an 8Bit 8 bit stream + * such as a file stream, where all the input is available in the + * buffer. + * + * \param[in] is Input stream context pointer + */ +static ANTLR3_MARKER +antlr38BitMark (pANTLR3_INT_STREAM is) +{ + pANTLR3_LEX_STATE state; + pANTLR3_INPUT_STREAM input; + + input = ((pANTLR3_INPUT_STREAM) (is->super)); + + /* New mark point + */ + input->markDepth++; + + /* See if we are revisiting a mark as we can just reuse the vector + * entry if we are, otherwise, we need a new one + */ + if (input->markDepth > input->markers->count) + { + state = ANTLR3_MALLOC(sizeof(ANTLR3_LEX_STATE)); + + /* Add it to the table + */ + input->markers->add(input->markers, state, ANTLR3_FREE_FUNC); /* No special structure, just free() on delete */ + } + else + { + state = (pANTLR3_LEX_STATE)input->markers->get(input->markers, input->markDepth - 1); + + /* Assume no errors for speed, it will just blow up if the table failed + * for some reasons, hence lots of unit tests on the tables ;-) + */ + } + + /* We have created or retrieved the state, so update it with the current + * elements of the lexer state. + */ + state->charPositionInLine = input->charPositionInLine; + state->currentLine = input->currentLine; + state->line = input->line; + state->nextChar = input->nextChar; + + is->lastMarker = input->markDepth; + + /* And that's it + */ + return input->markDepth; +} +/** \brief Rewind the lexer input to the state specified by the last produced mark. + * + * \param[in] input Input stream context pointer + * + * \remark + * Assumes 8 Bit input stream. + */ +static void +antlr38BitRewindLast (pANTLR3_INT_STREAM is) +{ + is->rewind(is, is->lastMarker); +} + +/** \brief Rewind the lexer input to the state specified by the supplied mark. + * + * \param[in] input Input stream context pointer + * + * \remark + * Assumes 8 Bit input stream. + */ +static void +antlr38BitRewind (pANTLR3_INT_STREAM is, ANTLR3_MARKER mark) +{ + pANTLR3_LEX_STATE state; + pANTLR3_INPUT_STREAM input; + + input = ((pANTLR3_INPUT_STREAM) is->super); + + /* Perform any clean up of the marks + */ + input->istream->release(input->istream, mark); + + /* Find the supplied mark state + */ + state = (pANTLR3_LEX_STATE)input->markers->get(input->markers, (ANTLR3_UINT32)(mark - 1)); + + /* Seek input pointer to the requested point (note we supply the void *pointer + * to whatever is implementing the int stream to seek). + */ + antlr38BitSeek(is, (ANTLR3_MARKER)(state->nextChar)); + + /* Reset to the reset of the information in the mark + */ + input->charPositionInLine = state->charPositionInLine; + input->currentLine = state->currentLine; + input->line = state->line; + input->nextChar = state->nextChar; + + /* And we are done + */ +} + +/** \brief Rewind the lexer input to the state specified by the supplied mark. + * + * \param[in] input Input stream context pointer + * + * \remark + * Assumes 8 Bit input stream. + */ +static void +antlr38BitRelease (pANTLR3_INT_STREAM is, ANTLR3_MARKER mark) +{ + pANTLR3_INPUT_STREAM input; + + input = ((pANTLR3_INPUT_STREAM) (is->super)); + + /* We don't do much here in fact as we never free any higher marks in + * the hashtable as we just resuse any memory allocated for them. + */ + input->markDepth = (ANTLR3_UINT32)(mark - 1); +} + +/** \brief Rewind the lexer input to the state specified by the supplied mark. + * + * \param[in] input Input stream context pointer + * + * \remark + * Assumes 8 Bit input stream. + */ +static void +antlr38BitSeek (pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint) +{ + ANTLR3_INT32 count; + pANTLR3_INPUT_STREAM input; + + input = ANTLR3_FUNC_PTR(((pANTLR3_INPUT_STREAM) is->super)); + + /* If the requested seek point is less than the current + * input point, then we assume that we are resetting from a mark + * and do not need to scan, but can just set to there. + */ + if (seekPoint <= (ANTLR3_MARKER)(input->nextChar)) + { + input->nextChar = ((pANTLR3_UINT8) seekPoint); + } + else + { + count = (ANTLR3_UINT32)(seekPoint - (ANTLR3_MARKER)(input->nextChar)); + + while (count--) + { + is->consume(is); + } + } +} +/** Return a substring of the 8 bit input stream in + * newly allocated memory. + * + * \param input Input stream context pointer + * \param start Offset in input stream where the string starts + * \param stop Offset in the input stream where the string ends. + */ +static pANTLR3_STRING +antlr38BitSubstr (pANTLR3_INPUT_STREAM input, ANTLR3_MARKER start, ANTLR3_MARKER stop) +{ + return input->strFactory->newPtr(input->strFactory, (pANTLR3_UINT8)start, (ANTLR3_UINT32)(stop - start + 1)); +} + +/** \brief Return the line number as understood by the 8 bit input stream. + * + * \param input Input stream context pointer + * \return Line number in input stream that we believe we are working on. + */ +static ANTLR3_UINT32 +antlr38BitGetLine (pANTLR3_INPUT_STREAM input) +{ + return input->line; +} + +/** Return a pointer into the input stream that points at the start + * of the current input line as triggered by the end of line character installed + * for the stream ('\n' unless told differently). + * + * \param[in] input + */ +static void * +antlr38BitGetLineBuf (pANTLR3_INPUT_STREAM input) +{ + return input->currentLine; +} + +/** Return the current offset in to the current line in the input stream. + * + * \param input Input stream context pointer + * \return Current line offset + */ +static ANTLR3_UINT32 +antlr38BitGetCharPosition (pANTLR3_INPUT_STREAM input) +{ + return input->charPositionInLine; +} + +/** Set the current line number as understood by the input stream. + * + * \param input Input stream context pointer + * \param line Line number to tell the input stream we are on + * + * \remark + * This function does not change any pointers, it just allows the programmer to set the + * line number according to some external criterion, such as finding a lexed directive + * like: #nnn "file.c" for instance, such that error reporting and so on in is in sync + * with some original source format. + */ +static void +antlr38BitSetLine (pANTLR3_INPUT_STREAM input, ANTLR3_UINT32 line) +{ + input->line = line; +} + +/** Set the current offset in the current line to be a particular setting. + * + * \param[in] input Input stream context pointer + * \param[in] position New setting for current offset. + * + * \remark + * This does not set the actual pointers in the input stream, it is purely for reporting + * purposes and so on as per antlr38BitSetLine(); + */ +static void +antlr38BitSetCharPosition (pANTLR3_INPUT_STREAM input, ANTLR3_UINT32 position) +{ + input->charPositionInLine = position; +} + +/** Set the newline trigger character in the input stream to the supplied parameter. + * + * \param[in] input Input stream context pointer + * \param[in] newlineChar Character to set to be the newline trigger. + * + * \remark + * - The supplied newLineChar is in UTF32 encoding (which means ASCII and latin1 etc + * are the same encodings), but the input stream catered to by this function is 8 bit + * only, so it is up to the programmer to ensure that the character supplied is valid. + */ +static void +antlr38BitSetNewLineChar (pANTLR3_INPUT_STREAM input, ANTLR3_UINT32 newlineChar) +{ + input->newlineChar = newlineChar; +} + + +/// \brief Common function to setup function interface for a UTF16 or UCS2 input stream. +/// +/// \param input Input stream context pointer +/// +/// \remark +/// - Strictly speaking, there is no such thing as a UCS2 input stream as the term +/// tends to confuse the notions of character encoding, unicode and so on. UCS2 is +/// essentially UTF16 without any surrogates and so the standard UTF16 +/// input stream is able to handle it without any special code. +/// +void +antlr3UTF16SetupStream (pANTLR3_INPUT_STREAM input, ANTLR3_BOOLEAN machineBigEndian, ANTLR3_BOOLEAN inputBigEndian) +{ + // Build a string factory for this stream. This is a UTF16 string factory which is a standard + // part of the ANTLR3 string. The string factory is then passed through the whole chain + // of lexer->parser->tree->treeparser and so on. + // + input->strFactory = antlr3StringFactoryNew(input->encoding); + + // Generic API that does not care about endianess. + // + input->istream->index = antlr3UTF16Index; // Calculate current index in input stream, UTF16 based + input->substr = antlr3UTF16Substr; // Return a string from the input stream + input->istream->seek = antlr3UTF16Seek; // How to seek to a specific point in the stream + + // We must install different UTF16 routines according to whether the input + // is the same endianess as the machine we are executing upon or not. If it is not + // then we must install methods that can convert the endianess on the fly as they go + // + + switch (machineBigEndian) + { + case ANTLR3_TRUE: + + // Machine is Big Endian, if the input is also then install the + // methods that do not access input by bytes and reverse them. + // Otherwise install endian aware methods. + // + if (inputBigEndian == ANTLR3_TRUE) + { + // Input is machine compatible + // + input->istream->consume = antlr3UTF16Consume; // Consume the next UTF16 character in the buffer + input->istream->_LA = antlr3UTF16LA; // Return the UTF32 character at offset n (1 based) + } + else + { + // Need to use methods that know that the input is little endian + // + input->istream->consume = antlr3UTF16ConsumeLE; // Consume the next UTF16 character in the buffer + input->istream->_LA = antlr3UTF16LALE; // Return the UTF32 character at offset n (1 based) + } + break; + + case ANTLR3_FALSE: + + // Machine is Little Endian, if the input is also then install the + // methods that do not access input by bytes and reverse them. + // Otherwise install endian aware methods. + // + if (inputBigEndian == ANTLR3_FALSE) + { + // Input is machine compatible + // + input->istream->consume = antlr3UTF16Consume; // Consume the next UTF16 character in the buffer + input->istream->_LA = antlr3UTF16LA; // Return the UTF32 character at offset n (1 based) + } + else + { + // Need to use methods that know that the input is Big Endian + // + input->istream->consume = antlr3UTF16ConsumeBE; // Consume the next UTF16 character in the buffer + input->istream->_LA = antlr3UTF16LABE; // Return the UTF32 character at offset n (1 based) + } + break; + } + + + input->charByteSize = 2; // Size in bytes of characters in this stream. + +} + +/// \brief Consume the next character in a UTF16 input stream +/// +/// \param input Input stream context pointer +/// +static void +antlr3UTF16Consume(pANTLR3_INT_STREAM is) +{ + pANTLR3_INPUT_STREAM input; + UTF32 ch; + UTF32 ch2; + + input = ((pANTLR3_INPUT_STREAM) (is->super)); + + // Buffer size is always in bytes + // + if ((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + // Indicate one more character in this line + // + input->charPositionInLine++; + + if ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)input->nextChar)) == input->newlineChar) + { + // Reset for start of a new line of input + // + input->line++; + input->charPositionInLine = 0; + input->currentLine = (void *)(((pANTLR3_UINT16)input->nextChar) + 1); + } + + // Increment to next character position, accounting for any surrogates + // + // Next char in natural machine byte order + // + ch = *((UTF16*)input->nextChar); + + // We consumed one 16 bit character + // + input->nextChar = (void *)(((pANTLR3_UINT16)input->nextChar) + 1); + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + + // If the 16 bits following the high surrogate are in the source buffer... + // + if ((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + // Next character is in natural machine byte order + // + ch2 = *((UTF16*)input->nextChar); + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // We consumed one 16 bit character + // + input->nextChar = (void *)(((pANTLR3_UINT16)input->nextChar) + 1); + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + // Note that we did not check for an invalid low surrogate here, or that fact that the + // lo surrogate was missing. We just picked out one 16 bit character unless the character + // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. + // + } +} + +/// \brief Return the input element assuming an 8 bit ascii input +/// +/// \param[in] input Input stream context pointer +/// \param[in] la 1 based offset of next input stream element +/// +/// \return Next input character in internal ANTLR3 encoding (UTF32) +/// +static ANTLR3_UCHAR +antlr3UTF16LA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) +{ + pANTLR3_INPUT_STREAM input; + UTF32 ch; + UTF32 ch2; + UTF16 * nextChar; + + // Find the input interface and where we are currently pointing to + // in the input stream + // + input = ((pANTLR3_INPUT_STREAM) (is->super)); + nextChar = input->nextChar; + + // If a positive offset then advance forward, else retreat + // + if (la >= 0) + { + while (--la > 0 && (pANTLR3_UINT8)nextChar < ((pANTLR3_UINT8)input->data) + input->sizeBuf ) + { + // Advance our copy of the input pointer + // + // Next char in natural machine byte order + // + ch = *nextChar++; + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) + { + // If the 16 bits following the high surrogate are in the source buffer... + // + if ((pANTLR3_UINT8)(nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + // Next character is in natural machine byte order + // + ch2 = *nextChar; + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // We consumed one 16 bit character + // + nextChar++; + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + // Note that we did not check for an invalid low surrogate here, or that fact that the + // lo surrogate was missing. We just picked out one 16 bit character unless the character + // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. + // + } + } + else + { + // We need to go backwards from our input point + // + while (la++ < 0 && (pANTLR3_UINT8)nextChar > (pANTLR3_UINT8)input->data ) + { + // Get the previous 16 bit character + // + ch = *--nextChar; + + // If we found a low surrogate then go back one more character if + // the hi surrogate is there + // + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) + { + ch2 = *(nextChar-1); + if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END) + { + // Yes, there is a high surrogate to match it so decrement one more and point to that + // + nextChar--; + } + } + } + } + + // Our local copy of nextChar is now pointing to either the correct character or end of file + // + // Input buffer size is always in bytes + // + if ( (pANTLR3_UINT8)nextChar >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + return ANTLR3_CHARSTREAM_EOF; + } + else + { + // Pick up the next 16 character (native machine byte order) + // + ch = *nextChar++; + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) + { + // If the 16 bits following the high surrogate are in the source buffer... + // + if ((pANTLR3_UINT8)(nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + // Next character is in natural machine byte order + // + ch2 = *nextChar; + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // Construct the UTF32 code point + // + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + } + return ch; +} + + +/// \brief Calculate the current index in the output stream. +/// \param[in] input Input stream context pointer +/// +static ANTLR3_MARKER +antlr3UTF16Index(pANTLR3_INT_STREAM is) +{ + pANTLR3_INPUT_STREAM input; + + input = ((pANTLR3_INPUT_STREAM) (is->super)); + + return (ANTLR3_MARKER)(input->nextChar); +} + +/// \brief Rewind the lexer input to the state specified by the supplied mark. +/// +/// \param[in] input Input stream context pointer +/// +/// \remark +/// Assumes UTF16 input stream. +/// +static void +antlr3UTF16Seek (pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint) +{ + pANTLR3_INPUT_STREAM input; + + input = ((pANTLR3_INPUT_STREAM) is->super); + + // If the requested seek point is less than the current + // input point, then we assume that we are resetting from a mark + // and do not need to scan, but can just set to there as rewind will + // reset line numbers and so on. + // + if (seekPoint <= (ANTLR3_MARKER)(input->nextChar)) + { + input->nextChar = (void *)seekPoint; + } + else + { + // Call consume until we reach the asked for seek point or EOF + // + while (is->_LA(is, 1) != ANTLR3_CHARSTREAM_EOF && seekPoint < (ANTLR3_MARKER)input->nextChar) + { + is->consume(is); + } + } +} +/// \brief Return a substring of the UTF16 input stream in +/// newly allocated memory. +/// +/// \param input Input stream context pointer +/// \param start Offset in input stream where the string starts +/// \param stop Offset in the input stream where the string ends. +/// +static pANTLR3_STRING +antlr3UTF16Substr (pANTLR3_INPUT_STREAM input, ANTLR3_MARKER start, ANTLR3_MARKER stop) +{ + return input->strFactory->newPtr(input->strFactory, (pANTLR3_UINT8)start, ((ANTLR3_UINT32_CAST(stop - start))/2) + 1); +} + +/// \brief Consume the next character in a UTF16 input stream when the input is Little Endian and the machine is not +/// Note that the UTF16 routines do not do any substantial verification of the input stream as for performance +/// sake, we assume it is validly encoded. So if a low surrogate is found at the curent input position then we +/// just consume it. Surrogate pairs should be seen as Hi, Lo. So if we have a Lo first, then the input stream +/// is fubar but we just ignore that. +/// +/// \param input Input stream context pointer +/// +static void +antlr3UTF16ConsumeLE(pANTLR3_INT_STREAM is) +{ + pANTLR3_INPUT_STREAM input; + UTF32 ch; + UTF32 ch2; + + input = ((pANTLR3_INPUT_STREAM) (is->super)); + + // Buffer size is always in bytes + // + if ((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + // Indicate one more character in this line + // + input->charPositionInLine++; + + if ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)input->nextChar)) == input->newlineChar) + { + // Reset for start of a new line of input + // + input->line++; + input->charPositionInLine = 0; + input->currentLine = (void *)(((pANTLR3_UINT16)input->nextChar) + 1); + } + + // Increment to next character position, accounting for any surrogates + // + // Next char in litle endian form + // + ch = *((pANTLR3_UINT8)input->nextChar) + (*((pANTLR3_UINT8)input->nextChar + 1) <<8); + + // We consumed one 16 bit character + // + input->nextChar = (void *)(((pANTLR3_UINT16)input->nextChar) + 1); + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + + // If the 16 bits following the high surrogate are in the source buffer... + // + if ((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + ch2 = *((pANTLR3_UINT8)input->nextChar) + (*((pANTLR3_UINT8)input->nextChar + 1) <<8); + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // We consumed one 16 bit character + // + input->nextChar = (void *)(((pANTLR3_UINT16)input->nextChar) + 1); + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + // Note that we did not check for an invalid low surrogate here, or that fact that the + // lo surrogate was missing. We just picked out one 16 bit character unless the character + // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. + // + } +} + +/// \brief Return the input element assuming a UTF16 input when the input is Little Endian and the machine is not +/// +/// \param[in] input Input stream context pointer +/// \param[in] la 1 based offset of next input stream element +/// +/// \return Next input character in internal ANTLR3 encoding (UTF32) +/// +static ANTLR3_UCHAR +antlr3UTF16LALE(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) +{ + pANTLR3_INPUT_STREAM input; + UTF32 ch; + UTF32 ch2; + pANTLR3_UCHAR nextChar; + + // Find the input interface and where we are currently pointing to + // in the input stream + // + input = ((pANTLR3_INPUT_STREAM) (is->super)); + nextChar = input->nextChar; + + // If a positive offset then advance forward, else retreat + // + if (la >= 0) + { + while (--la > 0 && (pANTLR3_UINT8)nextChar < ((pANTLR3_UINT8)input->data) + input->sizeBuf ) + { + // Advance our copy of the input pointer + // + // Next char in Little Endian byte order + // + ch = (*nextChar) + (*(nextChar+1) << 8); + nextChar += 2; + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) + { + // If the 16 bits following the high surrogate are in the source buffer... + // + if ((pANTLR3_UINT8)(nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + // Next character is in little endian byte order + // + ch2 = (*nextChar) + (*(nextChar+1) << 8); + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // We consumed one 16 bit character + // + nextChar += 2; + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + // Note that we did not check for an invalid low surrogate here, or that fact that the + // lo surrogate was missing. We just picked out one 16 bit character unless the character + // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. + // + } + } + else + { + // We need to go backwards from our input point + // + while (la++ < 0 && (pANTLR3_UINT8)nextChar > (pANTLR3_UINT8)input->data ) + { + // Get the previous 16 bit character + // + ch = (*nextChar - 2) + ((*nextChar -1) << 8); + nextChar -= 2; + + // If we found a low surrogate then go back one more character if + // the hi surrogate is there + // + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) + { + ch2 = (*nextChar - 2) + ((*nextChar -1) << 8); + if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END) + { + // Yes, there is a high surrogate to match it so decrement one more and point to that + // + nextChar -=2; + } + } + } + } + + // Our local copy of nextChar is now pointing to either the correct character or end of file + // + // Input buffer size is always in bytes + // + if ( (pANTLR3_UINT8)nextChar >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + return ANTLR3_CHARSTREAM_EOF; + } + else + { + // Pick up the next 16 character (little endian byte order) + // + ch = (*nextChar) + (*(nextChar+1) << 8); + nextChar += 2; + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) + { + // If the 16 bits following the high surrogate are in the source buffer... + // + if ((pANTLR3_UINT8)(nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + // Next character is in little endian byte order + // + ch2 = (*nextChar) + (*(nextChar+1) << 8); + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // Construct the UTF32 code point + // + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + } + return ch; +} + +/// \brief Consume the next character in a UTF16 input stream when the input is Big Endian and the machine is not +/// +/// \param input Input stream context pointer +/// +static void +antlr3UTF16ConsumeBE(pANTLR3_INT_STREAM is) +{ + pANTLR3_INPUT_STREAM input; + UTF32 ch; + UTF32 ch2; + + input = ((pANTLR3_INPUT_STREAM) (is->super)); + + // Buffer size is always in bytes + // + if ((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + // Indicate one more character in this line + // + input->charPositionInLine++; + + if ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)input->nextChar)) == input->newlineChar) + { + // Reset for start of a new line of input + // + input->line++; + input->charPositionInLine = 0; + input->currentLine = (void *)(((pANTLR3_UINT16)input->nextChar) + 1); + } + + // Increment to next character position, accounting for any surrogates + // + // Next char in big endian form + // + ch = *((pANTLR3_UINT8)input->nextChar + 1) + (*((pANTLR3_UINT8)input->nextChar ) <<8); + + // We consumed one 16 bit character + // + input->nextChar = (void *)(((pANTLR3_UINT16)input->nextChar) + 1); + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + + // If the 16 bits following the high surrogate are in the source buffer... + // + if ((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + // Big endian + // + ch2 = *((pANTLR3_UINT8)input->nextChar + 1) + (*((pANTLR3_UINT8)input->nextChar ) <<8); + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // We consumed one 16 bit character + // + input->nextChar = (void *)(((pANTLR3_UINT16)input->nextChar) + 1); + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + // Note that we did not check for an invalid low surrogate here, or that fact that the + // lo surrogate was missing. We just picked out one 16 bit character unless the character + // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. + // + } +} + +/// \brief Return the input element assuming a UTF16 input when the input is Little Endian and the machine is not +/// +/// \param[in] input Input stream context pointer +/// \param[in] la 1 based offset of next input stream element +/// +/// \return Next input character in internal ANTLR3 encoding (UTF32) +/// +static ANTLR3_UCHAR +antlr3UTF16LABE(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) +{ + pANTLR3_INPUT_STREAM input; + UTF32 ch; + UTF32 ch2; + pANTLR3_UCHAR nextChar; + + // Find the input interface and where we are currently pointing to + // in the input stream + // + input = ((pANTLR3_INPUT_STREAM) (is->super)); + nextChar = input->nextChar; + + // If a positive offset then advance forward, else retreat + // + if (la >= 0) + { + while (--la > 0 && (pANTLR3_UINT8)nextChar < ((pANTLR3_UINT8)input->data) + input->sizeBuf ) + { + // Advance our copy of the input pointer + // + // Next char in Big Endian byte order + // + ch = ((*nextChar) << 8) + *(nextChar+1); + nextChar += 2; + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) + { + // If the 16 bits following the high surrogate are in the source buffer... + // + if ((pANTLR3_UINT8)(nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + // Next character is in big endian byte order + // + ch2 = ((*nextChar) << 8) + *(nextChar+1); + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // We consumed one 16 bit character + // + nextChar += 2; + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + // Note that we did not check for an invalid low surrogate here, or that fact that the + // lo surrogate was missing. We just picked out one 16 bit character unless the character + // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. + // + } + } + else + { + // We need to go backwards from our input point + // + while (la++ < 0 && (pANTLR3_UINT8)nextChar > (pANTLR3_UINT8)input->data ) + { + // Get the previous 16 bit character + // + ch = ((*nextChar - 2) << 8) + (*nextChar -1); + nextChar -= 2; + + // If we found a low surrogate then go back one more character if + // the hi surrogate is there + // + if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) + { + ch2 = ((*nextChar - 2) << 8) + (*nextChar -1); + if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END) + { + // Yes, there is a high surrogate to match it so decrement one more and point to that + // + nextChar -=2; + } + } + } + } + + // Our local copy of nextChar is now pointing to either the correct character or end of file + // + // Input buffer size is always in bytes + // + if ( (pANTLR3_UINT8)nextChar >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + return ANTLR3_CHARSTREAM_EOF; + } + else + { + // Pick up the next 16 character (big endian byte order) + // + ch = ((*nextChar) << 8) + *(nextChar+1); + nextChar += 2; + + // If we have a surrogate pair then we need to consume + // a following valid LO surrogate. + // + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) + { + // If the 16 bits following the high surrogate are in the source buffer... + // + if ((pANTLR3_UINT8)(nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + // Next character is in big endian byte order + // + ch2 = ((*nextChar) << 8) + *(nextChar+1); + + // If it's a valid low surrogate, consume it + // + if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) + { + // Construct the UTF32 code point + // + ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + + (ch2 - UNI_SUR_LOW_START) + halfBase; + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it. + // + } + // Note that we ignore a valid hi surrogate that has no lo surrogate to go with + // it because the buffer ended + // + } + } + return ch; +} + +/// \brief Common function to setup function interface for a UTF3 input stream. +/// +/// \param input Input stream context pointer +/// +void +antlr3UTF32SetupStream (pANTLR3_INPUT_STREAM input, ANTLR3_BOOLEAN machineBigEndian, ANTLR3_BOOLEAN inputBigEndian) +{ + // Build a string factory for this stream. This is a UTF32 string factory which is a standard + // part of the ANTLR3 string. The string factory is then passed through the whole chain of lexer->parser->tree->treeparser + // and so on. + // + input->strFactory = antlr3StringFactoryNew(input->encoding); + + // Generic API that does not care about endianess. + // + input->istream->index = antlr3UTF32Index; // Calculate current index in input stream, UTF16 based + input->substr = antlr3UTF32Substr; // Return a string from the input stream + input->istream->seek = antlr3UTF32Seek; // How to seek to a specific point in the stream + input->istream->consume = antlr3UTF32Consume; // Consume the next UTF32 character in the buffer + + // We must install different UTF32 LA routines according to whether the input + // is the same endianess as the machine we are executing upon or not. If it is not + // then we must install methods that can convert the endianess on the fly as they go + // + switch (machineBigEndian) + { + case ANTLR3_TRUE: + + // Machine is Big Endian, if the input is also then install the + // methods that do not access input by bytes and reverse them. + // Otherwise install endian aware methods. + // + if (inputBigEndian == ANTLR3_TRUE) + { + // Input is machine compatible + // + input->istream->_LA = antlr3UTF32LA; // Return the UTF32 character at offset n (1 based) + } + else + { + // Need to use methods that know that the input is little endian + // + input->istream->_LA = antlr3UTF32LALE; // Return the UTF32 character at offset n (1 based) + } + break; + + case ANTLR3_FALSE: + + // Machine is Little Endian, if the input is also then install the + // methods that do not access input by bytes and reverse them. + // Otherwise install endian aware methods. + // + if (inputBigEndian == ANTLR3_FALSE) + { + // Input is machine compatible + // + input->istream->_LA = antlr3UTF32LA; // Return the UTF32 character at offset n (1 based) + } + else + { + // Need to use methods that know that the input is Big Endian + // + input->istream->_LA = antlr3UTF32LABE; // Return the UTF32 character at offset n (1 based) + } + break; + } + + input->charByteSize = 4; // Size in bytes of characters in this stream. +} + +/** \brief Consume the next character in a UTF32 input stream + * + * \param input Input stream context pointer + */ +static void +antlr3UTF32Consume(pANTLR3_INT_STREAM is) +{ + pANTLR3_INPUT_STREAM input; + + input = ((pANTLR3_INPUT_STREAM) (is->super)); + + // SizeBuf is always in bytes + // + if ((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + /* Indicate one more character in this line + */ + input->charPositionInLine++; + + if ((ANTLR3_UCHAR)(*((pANTLR3_UINT32)input->nextChar)) == input->newlineChar) + { + /* Reset for start of a new line of input + */ + input->line++; + input->charPositionInLine = 0; + input->currentLine = (void *)(((pANTLR3_UINT32)input->nextChar) + 1); + } + + /* Increment to next character position + */ + input->nextChar = (void *)(((pANTLR3_UINT32)input->nextChar) + 1); + } +} + +/// \brief Calculate the current index in the output stream. +/// \param[in] input Input stream context pointer +/// +static ANTLR3_MARKER +antlr3UTF32Index(pANTLR3_INT_STREAM is) +{ + pANTLR3_INPUT_STREAM input; + + input = ((pANTLR3_INPUT_STREAM) (is->super)); + + return (ANTLR3_MARKER)(input->nextChar); +} + +/// \brief Return a substring of the UTF16 input stream in +/// newly allocated memory. +/// +/// \param input Input stream context pointer +/// \param start Offset in input stream where the string starts +/// \param stop Offset in the input stream where the string ends. +/// +static pANTLR3_STRING +antlr3UTF32Substr (pANTLR3_INPUT_STREAM input, ANTLR3_MARKER start, ANTLR3_MARKER stop) +{ + return input->strFactory->newPtr(input->strFactory, (pANTLR3_UINT8)start, ((ANTLR3_UINT32_CAST(stop - start))/4) + 1); +} + +/// \brief Rewind the lexer input to the state specified by the supplied mark. +/// +/// \param[in] input Input stream context pointer +/// +/// \remark +/// Assumes UTF32 input stream. +/// +static void +antlr3UTF32Seek (pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint) +{ + pANTLR3_INPUT_STREAM input; + + input = ((pANTLR3_INPUT_STREAM) is->super); + + // If the requested seek point is less than the current + // input point, then we assume that we are resetting from a mark + // and do not need to scan, but can just set to there as rewind will + // reset line numbers and so on. + // + if (seekPoint <= (ANTLR3_MARKER)(input->nextChar)) + { + input->nextChar = (void *)seekPoint; + } + else + { + // Call consume until we reach the asked for seek point or EOF + // + while (is->_LA(is, 1) != ANTLR3_CHARSTREAM_EOF && seekPoint < (ANTLR3_MARKER)input->nextChar) + { + is->consume(is); + } + } +} + +/** \brief Return the input element assuming a UTF32 input in natural machine byte order + * + * \param[in] input Input stream context pointer + * \param[in] la 1 based offset of next input stream element + * + * \return Next input character in internal ANTLR3 encoding (UTF32) + */ +static ANTLR3_UCHAR +antlr3UTF32LA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) +{ + pANTLR3_INPUT_STREAM input; + + input = ((pANTLR3_INPUT_STREAM) (is->super)); + + if (( ((pANTLR3_UINT8)input->nextChar) + la - 1) >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + return ANTLR3_CHARSTREAM_EOF; + } + else + { + return (ANTLR3_UCHAR)(*((pANTLR3_UINT32)input->nextChar + la - 1)); + } +} + +/** \brief Return the input element assuming a UTF32 input in little endian byte order + * + * \param[in] input Input stream context pointer + * \param[in] la 1 based offset of next input stream element + * + * \return Next input character in internal ANTLR3 encoding (UTF32) + */ +static ANTLR3_UCHAR +antlr3UTF32LALE(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) +{ + pANTLR3_INPUT_STREAM input; + + input = ((pANTLR3_INPUT_STREAM) (is->super)); + + if (( ((pANTLR3_UINT8)input->nextChar) + la - 1) >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + return ANTLR3_CHARSTREAM_EOF; + } + else + { + ANTLR3_UCHAR c; + + c = (ANTLR3_UCHAR)(*((pANTLR3_UINT32)input->nextChar + la - 1)); + + // Swap Endianess to Big Endian + // + return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24); + } +} + +/** \brief Return the input element assuming a UTF32 input in big endian byte order + * + * \param[in] input Input stream context pointer + * \param[in] la 1 based offset of next input stream element + * + * \return Next input character in internal ANTLR3 encoding (UTF32) + * \remark This is the same code as LE version but seprated in case there are better optimisations fo rendinan swap + */ +static ANTLR3_UCHAR +antlr3UTF32LABE(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) +{ + pANTLR3_INPUT_STREAM input; + + input = ((pANTLR3_INPUT_STREAM) (is->super)); + + if (( ((pANTLR3_UINT8)input->nextChar) + la - 1) >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + return ANTLR3_CHARSTREAM_EOF; + } + else + { + ANTLR3_UCHAR c; + + c = (ANTLR3_UCHAR)(*((pANTLR3_UINT32)input->nextChar + la - 1)); + + // Swap Endianess to Little Endian + // + return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24); + } +} + + +/// \brief Common function to setup function interface for a UTF8 input stream. +/// +/// \param input Input stream context pointer +/// +void +antlr3UTF8SetupStream (pANTLR3_INPUT_STREAM input) +{ + // Build a string factory for this stream. This is a UTF16 string factory which is a standard + // part of the ANTLR3 string. The string factory is then passed through the whole chain of lexer->parser->tree->treeparser + // and so on. + // + input->strFactory = antlr3StringFactoryNew(input->encoding); + + // Generic API that does not care about endianess. + // + input->istream->consume = antlr3UTF8Consume; // Consume the next UTF32 character in the buffer + input->istream->_LA = antlr3UTF8LA; // Return the UTF32 character at offset n (1 based) + input->charByteSize = 0; // Size in bytes of characters in this stream. +} + +// ------------------------------------------------------ +// Following is from Unicode.org (see antlr3convertutf.c) +// + +/// Index into the table below with the first byte of a UTF-8 sequence to +/// get the number of trailing bytes that are supposed to follow it. +/// Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is +/// left as-is for anyone who may want to do such conversion, which was +/// allowed in earlier algorithms. +/// +static const ANTLR3_UINT32 trailingBytesForUTF8[256] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 +}; + +/// Magic values subtracted from a buffer value during UTF8 conversion. +/// This table contains as many values as there might be trailing bytes +/// in a UTF-8 sequence. +/// +static const UTF32 offsetsFromUTF8[6] = + { 0x00000000UL, 0x00003080UL, 0x000E2080UL, + 0x03C82080UL, 0xFA082080UL, 0x82082080UL + }; + +// End of Unicode.org tables +// ------------------------- + + +/** \brief Consume the next character in a UTF8 input stream + * + * \param input Input stream context pointer + */ +static void +antlr3UTF8Consume(pANTLR3_INT_STREAM is) +{ + pANTLR3_INPUT_STREAM input; + ANTLR3_UINT32 extraBytesToRead; + ANTLR3_UCHAR ch; + pANTLR3_UINT8 nextChar; + + input = ((pANTLR3_INPUT_STREAM) (is->super)); + + nextChar = input->nextChar; + + if (nextChar < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + // Indicate one more character in this line + // + input->charPositionInLine++; + + // Are there more bytes needed to make up the whole thing? + // + extraBytesToRead = trailingBytesForUTF8[*nextChar]; + + if (nextChar + extraBytesToRead >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + input->nextChar = (((pANTLR3_UINT8)input->data) + input->sizeBuf); + return; + } + + // Cases deliberately fall through (see note A in antlrconvertutf.c) + // Legal UTF8 is only 4 bytes but 6 bytes could be used in old UTF8 so + // we allow it. + // + ch = 0; + switch (extraBytesToRead) { + case 5: ch += *nextChar++; ch <<= 6; + case 4: ch += *nextChar++; ch <<= 6; + case 3: ch += *nextChar++; ch <<= 6; + case 2: ch += *nextChar++; ch <<= 6; + case 1: ch += *nextChar++; ch <<= 6; + case 0: ch += *nextChar++; + } + + // Magically correct the input value + // + ch -= offsetsFromUTF8[extraBytesToRead]; + if (ch == input->newlineChar) + { + /* Reset for start of a new line of input + */ + input->line++; + input->charPositionInLine = 0; + input->currentLine = (void *)nextChar; + } + + // Update input pointer + // + input->nextChar = nextChar; + } +} +/** \brief Return the input element assuming a UTF8 input + * + * \param[in] input Input stream context pointer + * \param[in] la 1 based offset of next input stream element + * + * \return Next input character in internal ANTLR3 encoding (UTF32) + */ +static ANTLR3_UCHAR +antlr3UTF8LA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) +{ + pANTLR3_INPUT_STREAM input; + ANTLR3_UINT32 extraBytesToRead; + ANTLR3_UCHAR ch; + pANTLR3_UINT8 nextChar; + + input = ((pANTLR3_INPUT_STREAM) (is->super)); + + nextChar = input->nextChar; + + // Do we need to traverse forwards or backwards? + // - LA(0) is treated as LA(1) and we assume that the nextChar is + // already positioned. + // - LA(n+) ; n>1 means we must traverse forward n-1 characters catering for UTF8 encoding + // - LA(-n) means we must traverse backwards n chracters + // + if (la > 1) { + + // Make sure that we have at least one character left before trying to + // loop through the buffer. + // + if (nextChar < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + // Now traverse n-1 characters forward + // + while (--la > 0) + { + // Does the next character require trailing bytes? + // If so advance the pointer by that many bytes as well as advancing + // one position for what will be at least a single byte character. + // + nextChar += trailingBytesForUTF8[*nextChar] + 1; + + // Does that calculation take us past the byte length of the buffer? + // + if (nextChar >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + return ANTLR3_CHARSTREAM_EOF; + } + } + } + else + { + return ANTLR3_CHARSTREAM_EOF; + } + } + else + { + // LA is negative so we decrease the pointer by n character positions + // + while (nextChar > (pANTLR3_UINT8)input->data && la++ < 0) + { + // Traversing backwards in UTF8 means decermenting by one + // then continuing to decrement while ever a character pattern + // is flagged as being a trailing byte of an encoded code point. + // Trailing UTF8 bytes always start with 10 in binary. We assumne that + // the UTF8 is well formed and do not check boundary conditions + // + nextChar--; + while ((*nextChar & 0xC0) == 0x80) + { + nextChar--; + } + } + } + + // nextChar is now pointing at the UTF8 encoded character that we need to + // decode and return. + // + // Are there more bytes needed to make up the whole thing? + // + extraBytesToRead = trailingBytesForUTF8[*nextChar]; + if (nextChar + extraBytesToRead >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + return ANTLR3_CHARSTREAM_EOF; + } + + // Cases deliberately fall through (see note A in antlrconvertutf.c) + // + ch = 0; + switch (extraBytesToRead) { + case 5: ch += *nextChar++; ch <<= 6; + case 4: ch += *nextChar++; ch <<= 6; + case 3: ch += *nextChar++; ch <<= 6; + case 2: ch += *nextChar++; ch <<= 6; + case 1: ch += *nextChar++; ch <<= 6; + case 0: ch += *nextChar++; + } + + // Magically correct the input value + // + ch -= offsetsFromUTF8[extraBytesToRead]; + + return ch; +} + +// EBCDIC to ASCII conversion table +// +// This for EBCDIC EDF04 translated to ISO-8859.1 which is the usually accepted POSIX +// translation and the character tables are published all over the interweb. +// +const ANTLR3_UCHAR e2a[256] = +{ + 0x00, 0x01, 0x02, 0x03, 0x85, 0x09, 0x86, 0x7f, + 0x87, 0x8d, 0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x8f, 0x0a, 0x08, 0x97, + 0x18, 0x19, 0x9c, 0x9d, 0x1c, 0x1d, 0x1e, 0x1f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x92, 0x17, 0x1b, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07, + 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, + 0x98, 0x99, 0x9a, 0x9b, 0x14, 0x15, 0x9e, 0x1a, + 0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1, 0xe3, 0xe5, + 0xe7, 0xf1, 0x60, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, + 0x26, 0xe9, 0xea, 0xeb, 0xe8, 0xed, 0xee, 0xef, + 0xec, 0xdf, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x9f, + 0x2d, 0x2f, 0xc2, 0xc4, 0xc0, 0xc1, 0xc3, 0xc5, + 0xc7, 0xd1, 0x5e, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, + 0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf, + 0xcc, 0xa8, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, + 0xd8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1, + 0xb0, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, + 0x71, 0x72, 0xaa, 0xba, 0xe6, 0xb8, 0xc6, 0xa4, + 0xb5, 0xaf, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, + 0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0xdd, 0xde, 0xae, + 0xa2, 0xa3, 0xa5, 0xb7, 0xa9, 0xa7, 0xb6, 0xbc, + 0xbd, 0xbe, 0xac, 0x5b, 0x5c, 0x5d, 0xb4, 0xd7, + 0xf9, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0xad, 0xf4, 0xf6, 0xf2, 0xf3, 0xf5, + 0xa6, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, + 0x51, 0x52, 0xb9, 0xfb, 0xfc, 0xdb, 0xfa, 0xff, + 0xd9, 0xf7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, + 0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2, 0xd3, 0xd5, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0xb3, 0x7b, 0xdc, 0x7d, 0xda, 0x7e +}; + +/// \brief Common function to setup function interface for a EBCDIC input stream. +/// +/// \param input Input stream context pointer +/// +void +antlr3EBCDICSetupStream (pANTLR3_INPUT_STREAM input) +{ + // EBCDIC streams can use the standard 8 bit string factory + // + input->strFactory = antlr3StringFactoryNew(input->encoding); + + // Generic API that does not care about endianess. + // + input->istream->_LA = antlr3EBCDICLA; // Return the UTF32 character at offset n (1 based) + input->charByteSize = 1; // Size in bytes of characters in this stream. +} + +/// \brief Return the input element assuming an 8 bit EBCDIC input +/// +/// \param[in] input Input stream context pointer +/// \param[in] la 1 based offset of next input stream element +/// +/// \return Next input character in internal ANTLR3 encoding (UTF32) after translation +/// from EBCDIC to ASCII +/// +static ANTLR3_UCHAR +antlr3EBCDICLA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) +{ + pANTLR3_INPUT_STREAM input; + + input = ((pANTLR3_INPUT_STREAM) (is->super)); + + if (( ((pANTLR3_UINT8)input->nextChar) + la - 1) >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) + { + return ANTLR3_CHARSTREAM_EOF; + } + else + { + // Translate the required character via the constant conversion table + // + return e2a[(*((pANTLR3_UINT8)input->nextChar + la - 1))]; + } +}
\ No newline at end of file diff --git a/impl/antlr/libantlr3c-3.4/src/antlr3intstream.c b/impl/antlr/libantlr3c-3.4/src/antlr3intstream.c new file mode 100644 index 0000000..b9d49d8 --- /dev/null +++ b/impl/antlr/libantlr3c-3.4/src/antlr3intstream.c @@ -0,0 +1,66 @@ +/// \file +/// Implementation of superclass elements of an ANTLR3 int stream. +/// The only methods required are an allocator and a destructor. +/// \addtogroup pANTLR3_INT_STREAM +/// @{ + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <antlr3intstream.h> + +static void freeStream (pANTLR3_INT_STREAM stream); + +ANTLR3_API pANTLR3_INT_STREAM +antlr3IntStreamNew() +{ + pANTLR3_INT_STREAM stream; + + // Allocate memory + // + stream = (pANTLR3_INT_STREAM) ANTLR3_CALLOC(1, sizeof(ANTLR3_INT_STREAM)); + + if (stream == NULL) + { + return NULL; + } + + stream->free = freeStream; + + return stream; +} + +static void +freeStream (pANTLR3_INT_STREAM stream) +{ + ANTLR3_FREE(stream); +} + +/// @} +/// diff --git a/impl/antlr/libantlr3c-3.4/src/antlr3lexer.c b/impl/antlr/libantlr3c-3.4/src/antlr3lexer.c new file mode 100644 index 0000000..d981ab7 --- /dev/null +++ b/impl/antlr/libantlr3c-3.4/src/antlr3lexer.c @@ -0,0 +1,904 @@ +/** \file + * + * Base implementation of an antlr 3 lexer. + * + * An ANTLR3 lexer implements a base recongizer, a token source and + * a lexer interface. It constructs a base recognizer with default + * functions, then overrides any of these that are parser specific (usual + * default implementation of base recognizer. + */ + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <antlr3lexer.h> + +static void mTokens (pANTLR3_LEXER lexer); +static void setCharStream (pANTLR3_LEXER lexer, pANTLR3_INPUT_STREAM input); +static void pushCharStream (pANTLR3_LEXER lexer, pANTLR3_INPUT_STREAM input); +static void popCharStream (pANTLR3_LEXER lexer); + +static void emitNew (pANTLR3_LEXER lexer, pANTLR3_COMMON_TOKEN token); +static pANTLR3_COMMON_TOKEN emit (pANTLR3_LEXER lexer); +static ANTLR3_BOOLEAN matchs (pANTLR3_LEXER lexer, ANTLR3_UCHAR * string); +static ANTLR3_BOOLEAN matchc (pANTLR3_LEXER lexer, ANTLR3_UCHAR c); +static ANTLR3_BOOLEAN matchRange (pANTLR3_LEXER lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high); +static void matchAny (pANTLR3_LEXER lexer); +static void recover (pANTLR3_LEXER lexer); +static ANTLR3_UINT32 getLine (pANTLR3_LEXER lexer); +static ANTLR3_MARKER getCharIndex (pANTLR3_LEXER lexer); +static ANTLR3_UINT32 getCharPositionInLine (pANTLR3_LEXER lexer); +static pANTLR3_STRING getText (pANTLR3_LEXER lexer); +static pANTLR3_COMMON_TOKEN nextToken (pANTLR3_TOKEN_SOURCE toksource); + +static void displayRecognitionError (pANTLR3_BASE_RECOGNIZER rec, pANTLR3_UINT8 * tokenNames); +static void reportError (pANTLR3_BASE_RECOGNIZER rec); +static void * getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream); +static void * getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e, + ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow); + +static void reset (pANTLR3_BASE_RECOGNIZER rec); + +static void freeLexer (pANTLR3_LEXER lexer); + + +ANTLR3_API pANTLR3_LEXER +antlr3LexerNew(ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state) +{ + pANTLR3_LEXER lexer; + pANTLR3_COMMON_TOKEN specialT; + + /* Allocate memory + */ + lexer = (pANTLR3_LEXER) ANTLR3_MALLOC(sizeof(ANTLR3_LEXER)); + + if (lexer == NULL) + { + return NULL; + } + + /* Now we need to create the base recognizer + */ + lexer->rec = antlr3BaseRecognizerNew(ANTLR3_TYPE_LEXER, sizeHint, state); + + if (lexer->rec == NULL) + { + lexer->free(lexer); + return NULL; + } + lexer->rec->super = lexer; + + lexer->rec->displayRecognitionError = displayRecognitionError; + lexer->rec->reportError = reportError; + lexer->rec->reset = reset; + lexer->rec->getCurrentInputSymbol = getCurrentInputSymbol; + lexer->rec->getMissingSymbol = getMissingSymbol; + + /* Now install the token source interface + */ + if (lexer->rec->state->tokSource == NULL) + { + lexer->rec->state->tokSource = (pANTLR3_TOKEN_SOURCE)ANTLR3_CALLOC(1, sizeof(ANTLR3_TOKEN_SOURCE)); + + if (lexer->rec->state->tokSource == NULL) + { + lexer->rec->free(lexer->rec); + lexer->free(lexer); + + return NULL; + } + lexer->rec->state->tokSource->super = lexer; + + /* Install the default nextToken() method, which may be overridden + * by generated code, or by anything else in fact. + */ + lexer->rec->state->tokSource->nextToken = nextToken; + lexer->rec->state->tokSource->strFactory = NULL; + + lexer->rec->state->tokFactory = NULL; + } + + /* Install the lexer API + */ + lexer->setCharStream = setCharStream; + lexer->mTokens = (void (*)(void *))(mTokens); + lexer->setCharStream = setCharStream; + lexer->pushCharStream = pushCharStream; + lexer->popCharStream = popCharStream; + lexer->emit = emit; + lexer->emitNew = emitNew; + lexer->matchs = matchs; + lexer->matchc = matchc; + lexer->matchRange = matchRange; + lexer->matchAny = matchAny; + lexer->recover = recover; + lexer->getLine = getLine; + lexer->getCharIndex = getCharIndex; + lexer->getCharPositionInLine = getCharPositionInLine; + lexer->getText = getText; + lexer->free = freeLexer; + + /* Initialise the eof token + */ + specialT = &(lexer->rec->state->tokSource->eofToken); + antlr3SetTokenAPI (specialT); + specialT->setType (specialT, ANTLR3_TOKEN_EOF); + specialT->factoryMade = ANTLR3_TRUE; // Prevent things trying to free() it + specialT->strFactory = NULL; + specialT->textState = ANTLR3_TEXT_NONE; + specialT->custom = NULL; + specialT->user1 = 0; + specialT->user2 = 0; + specialT->user3 = 0; + + // Initialize the skip token. + // + specialT = &(lexer->rec->state->tokSource->skipToken); + antlr3SetTokenAPI (specialT); + specialT->setType (specialT, ANTLR3_TOKEN_INVALID); + specialT->factoryMade = ANTLR3_TRUE; // Prevent things trying to free() it + specialT->strFactory = NULL; + specialT->custom = NULL; + specialT->user1 = 0; + specialT->user2 = 0; + specialT->user3 = 0; + return lexer; +} + +static void +reset (pANTLR3_BASE_RECOGNIZER rec) +{ + pANTLR3_LEXER lexer; + + lexer = rec->super; + + lexer->rec->state->token = NULL; + lexer->rec->state->type = ANTLR3_TOKEN_INVALID; + lexer->rec->state->channel = ANTLR3_TOKEN_DEFAULT_CHANNEL; + lexer->rec->state->tokenStartCharIndex = -1; + lexer->rec->state->tokenStartCharPositionInLine = -1; + lexer->rec->state->tokenStartLine = -1; + + lexer->rec->state->text = NULL; + + // OK - that's all hunky dory, but we may well have had + // a token factory that needs a reset. Do that here + // + if (lexer->rec->state->tokFactory != NULL) + { + lexer->rec->state->tokFactory->reset(lexer->rec->state->tokFactory); + } +} + +/// +/// \brief +/// Returns the next available token from the current input stream. +/// +/// \param toksource +/// Points to the implementation of a token source. The lexer is +/// addressed by the super structure pointer. +/// +/// \returns +/// The next token in the current input stream or the EOF token +/// if there are no more tokens. +/// +/// \remarks +/// Write remarks for nextToken here. +/// +/// \see nextToken +/// +ANTLR3_INLINE static pANTLR3_COMMON_TOKEN +nextTokenStr (pANTLR3_TOKEN_SOURCE toksource) +{ + pANTLR3_LEXER lexer; + pANTLR3_RECOGNIZER_SHARED_STATE state; + pANTLR3_INPUT_STREAM input; + pANTLR3_INT_STREAM istream; + + lexer = (pANTLR3_LEXER)(toksource->super); + state = lexer->rec->state; + input = lexer->input; + istream = input->istream; + + /// Loop until we get a non skipped token or EOF + /// + for (;;) + { + // Get rid of any previous token (token factory takes care of + // any de-allocation when this token is finally used up. + // + state->token = NULL; + state->error = ANTLR3_FALSE; // Start out without an exception + state->failed = ANTLR3_FALSE; + + // Now call the matching rules and see if we can generate a new token + // + for (;;) + { + // Record the start of the token in our input stream. + // + state->channel = ANTLR3_TOKEN_DEFAULT_CHANNEL; + state->tokenStartCharIndex = (ANTLR3_MARKER)(((pANTLR3_UINT8)input->nextChar)); + state->tokenStartCharPositionInLine = input->charPositionInLine; + state->tokenStartLine = input->line; + state->text = NULL; + state->custom = NULL; + state->user1 = 0; + state->user2 = 0; + state->user3 = 0; + + if (istream->_LA(istream, 1) == ANTLR3_CHARSTREAM_EOF) + { + // Reached the end of the current stream, nothing more to do if this is + // the last in the stack. + // + pANTLR3_COMMON_TOKEN teof = &(toksource->eofToken); + + teof->setStartIndex (teof, lexer->getCharIndex(lexer)); + teof->setStopIndex (teof, lexer->getCharIndex(lexer)); + teof->setLine (teof, lexer->getLine(lexer)); + teof->factoryMade = ANTLR3_TRUE; // This isn't really manufactured but it stops things from trying to free it + return teof; + } + + state->token = NULL; + state->error = ANTLR3_FALSE; // Start out without an exception + state->failed = ANTLR3_FALSE; + + // Call the generated lexer, see if it can get a new token together. + // + lexer->mTokens(lexer->ctx); + + if (state->error == ANTLR3_TRUE) + { + // Recognition exception, report it and try to recover. + // + state->failed = ANTLR3_TRUE; + lexer->rec->reportError(lexer->rec); + lexer->recover(lexer); + } + else + { + if (state->token == NULL) + { + // Emit the real token, which adds it in to the token stream basically + // + emit(lexer); + } + else if (state->token == &(toksource->skipToken)) + { + // A real token could have been generated, but "Computer say's naaaaah" and it + // it is just something we need to skip altogether. + // + continue; + } + + // Good token, not skipped, not EOF token + // + return state->token; + } + } + } +} + +/** + * \brief + * Default implementation of the nextToken() call for a lexer. + * + * \param toksource + * Points to the implementation of a token source. The lexer is + * addressed by the super structure pointer. + * + * \returns + * The next token in the current input stream or the EOF token + * if there are no more tokens in any input stream in the stack. + * + * Write detailed description for nextToken here. + * + * \remarks + * Write remarks for nextToken here. + * + * \see nextTokenStr + */ +static pANTLR3_COMMON_TOKEN +nextToken (pANTLR3_TOKEN_SOURCE toksource) +{ + pANTLR3_COMMON_TOKEN tok; + + // Find the next token in the current stream + // + tok = nextTokenStr(toksource); + + // If we got to the EOF token then switch to the previous + // input stream if there were any and just return the + // EOF if there are none. We must check the next token + // in any outstanding input stream we pop into the active + // role to see if it was sitting at EOF after PUSHing the + // stream we just consumed, otherwise we will return EOF + // on the reinstalled input stream, when in actual fact + // there might be more input streams to POP before the + // real EOF of the whole logical inptu stream. Hence we + // use a while loop here until we find somethign in the stream + // that isn't EOF or we reach the actual end of the last input + // stream on the stack. + // + while (tok->type == ANTLR3_TOKEN_EOF) + { + pANTLR3_LEXER lexer; + + lexer = (pANTLR3_LEXER)(toksource->super); + + if (lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0) + { + // We have another input stream in the stack so we + // need to revert to it, then resume the loop to check + // it wasn't sitting at EOF itself. + // + lexer->popCharStream(lexer); + tok = nextTokenStr(toksource); + } + else + { + // There were no more streams on the input stack + // so this EOF is the 'real' logical EOF for + // the input stream. So we just exit the loop and + // return the EOF we have found. + // + break; + } + + } + + // return whatever token we have, which may be EOF + // + return tok; +} + +ANTLR3_API pANTLR3_LEXER +antlr3LexerNewStream(ANTLR3_UINT32 sizeHint, pANTLR3_INPUT_STREAM input, pANTLR3_RECOGNIZER_SHARED_STATE state) +{ + pANTLR3_LEXER lexer; + + // Create a basic lexer first + // + lexer = antlr3LexerNew(sizeHint, state); + + if (lexer != NULL) + { + // Install the input stream and reset the lexer + // + setCharStream(lexer, input); + } + + return lexer; +} + +static void mTokens (pANTLR3_LEXER lexer) +{ + if (lexer) // Fool compiler, avoid pragmas + { + ANTLR3_FPRINTF(stderr, "lexer->mTokens(): Error: No lexer rules were added to the lexer yet!\n"); + } +} + +static void +reportError (pANTLR3_BASE_RECOGNIZER rec) +{ + // Indicate this recognizer had an error while processing. + // + rec->state->errorCount++; + + rec->displayRecognitionError(rec, rec->state->tokenNames); +} + +#ifdef ANTLR3_WINDOWS +#pragma warning( disable : 4100 ) +#endif + +/** Default lexer error handler (works for 8 bit streams only!!!) + */ +static void +displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames) +{ + pANTLR3_LEXER lexer; + pANTLR3_EXCEPTION ex; + pANTLR3_STRING ftext; + + lexer = (pANTLR3_LEXER)(recognizer->super); + ex = lexer->rec->state->exception; + + // See if there is a 'filename' we can use + // + if (ex->name == NULL) + { + ANTLR3_FPRINTF(stderr, "-unknown source-("); + } + else + { + ftext = ex->streamName->to8(ex->streamName); + ANTLR3_FPRINTF(stderr, "%s(", ftext->chars); + } + + ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line); + ANTLR3_FPRINTF(stderr, ": lexer error %d :\n\t%s at offset %d, ", + ex->type, + (pANTLR3_UINT8) (ex->message), + ex->charPositionInLine+1 + ); + { + ANTLR3_INT32 width; + + width = ANTLR3_UINT32_CAST(( (pANTLR3_UINT8)(lexer->input->data) + (lexer->input->size(lexer->input) )) - (pANTLR3_UINT8)(ex->index)); + + if (width >= 1) + { + if (isprint(ex->c)) + { + ANTLR3_FPRINTF(stderr, "near '%c' :\n", ex->c); + } + else + { + ANTLR3_FPRINTF(stderr, "near char(%#02X) :\n", (ANTLR3_UINT8)(ex->c)); + } + ANTLR3_FPRINTF(stderr, "\t%.*s\n", width > 20 ? 20 : width ,((pANTLR3_UINT8)ex->index)); + } + else + { + ANTLR3_FPRINTF(stderr, "(end of input).\n\t This indicates a poorly specified lexer RULE\n\t or unterminated input element such as: \"STRING[\"]\n"); + ANTLR3_FPRINTF(stderr, "\t The lexer was matching from line %d, offset %d, which\n\t ", + (ANTLR3_UINT32)(lexer->rec->state->tokenStartLine), + (ANTLR3_UINT32)(lexer->rec->state->tokenStartCharPositionInLine) + ); + width = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)(lexer->input->data)+(lexer->input->size(lexer->input))) - (pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex)); + + if (width >= 1) + { + ANTLR3_FPRINTF(stderr, "looks like this:\n\t\t%.*s\n", width > 20 ? 20 : width ,(pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex)); + } + else + { + ANTLR3_FPRINTF(stderr, "is also the end of the line, so you must check your lexer rules\n"); + } + } + } +} + +static void setCharStream (pANTLR3_LEXER lexer, pANTLR3_INPUT_STREAM input) +{ + /* Install the input interface + */ + lexer->input = input; + + /* We may need a token factory for the lexer; we don't destroy any existing factory + * until the lexer is destroyed, as people may still be using the tokens it produced. + * TODO: Later I will provide a dup() method for a token so that it can extract itself + * out of the factory. + */ + if (lexer->rec->state->tokFactory == NULL) + { + lexer->rec->state->tokFactory = antlr3TokenFactoryNew(input); + } + else + { + /* When the input stream is being changed on the fly, rather than + * at the start of a new lexer, then we must tell the tokenFactory + * which input stream to adorn the tokens with so that when they + * are asked to provide their original input strings they can + * do so from the correct text stream. + */ + lexer->rec->state->tokFactory->setInputStream(lexer->rec->state->tokFactory, input); + } + + /* Propagate the string factory so that we preserve the encoding form from + * the input stream. + */ + if (lexer->rec->state->tokSource->strFactory == NULL) + { + lexer->rec->state->tokSource->strFactory = input->strFactory; + + // Set the newly acquired string factory up for our pre-made tokens + // for EOF. + // + if (lexer->rec->state->tokSource->eofToken.strFactory == NULL) + { + lexer->rec->state->tokSource->eofToken.strFactory = input->strFactory; + } + } + + /* This is a lexer, install the appropriate exception creator + */ + lexer->rec->exConstruct = antlr3RecognitionExceptionNew; + + /* Set the current token to nothing + */ + lexer->rec->state->token = NULL; + lexer->rec->state->text = NULL; + lexer->rec->state->tokenStartCharIndex = -1; + + /* Copy the name of the char stream to the token source + */ + lexer->rec->state->tokSource->fileName = input->fileName; +} + +/*! + * \brief + * Change to a new input stream, remembering the old one. + * + * \param lexer + * Pointer to the lexer instance to switch input streams for. + * + * \param input + * New input stream to install as the current one. + * + * Switches the current character input stream to + * a new one, saving the old one, which we will revert to at the end of this + * new one. + */ +static void +pushCharStream (pANTLR3_LEXER lexer, pANTLR3_INPUT_STREAM input) +{ + // Do we need a new input stream stack? + // + if (lexer->rec->state->streams == NULL) + { + // This is the first call to stack a new + // stream and so we must create the stack first. + // + lexer->rec->state->streams = antlr3StackNew(0); + + if (lexer->rec->state->streams == NULL) + { + // Could not do this, we just fail to push it. + // TODO: Consider if this is what we want to do, but then + // any programmer can override this method to do something else. + return; + } + } + + // We have a stack, so we can save the current input stream + // into it. + // + lexer->input->istream->mark(lexer->input->istream); + lexer->rec->state->streams->push(lexer->rec->state->streams, lexer->input, NULL); + + // And now we can install this new one + // + lexer->setCharStream(lexer, input); +} + +/*! + * \brief + * Stops using the current input stream and reverts to any prior + * input stream on the stack. + * + * \param lexer + * Description of parameter lexer. + * + * Pointer to a function that abandons the current input stream, whether it + * is empty or not and reverts to the previous stacked input stream. + * + * \remark + * The function fails silently if there are no prior input streams. + */ +static void +popCharStream (pANTLR3_LEXER lexer) +{ + pANTLR3_INPUT_STREAM input; + + // If we do not have a stream stack or we are already at the + // stack bottom, then do nothing. + // + if (lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0) + { + // We just leave the current stream to its fate, we do not close + // it or anything as we do not know what the programmer intended + // for it. This method can always be overridden of course. + // So just find out what was currently saved on the stack and use + // that now, then pop it from the stack. + // + input = (pANTLR3_INPUT_STREAM)(lexer->rec->state->streams->top); + lexer->rec->state->streams->pop(lexer->rec->state->streams); + + // Now install the stream as the current one. + // + lexer->setCharStream(lexer, input); + lexer->input->istream->rewindLast(lexer->input->istream); + } + return; +} + +static void emitNew (pANTLR3_LEXER lexer, pANTLR3_COMMON_TOKEN token) +{ + lexer->rec->state->token = token; /* Voila! */ +} + +static pANTLR3_COMMON_TOKEN +emit (pANTLR3_LEXER lexer) +{ + pANTLR3_COMMON_TOKEN token; + + /* We could check pointers to token factories and so on, but + * we are in code that we want to run as fast as possible + * so we are not checking any errors. So make sure you have installed an input stream before + * trying to emit a new token. + */ + token = lexer->rec->state->tokFactory->newToken(lexer->rec->state->tokFactory); + + /* Install the supplied information, and some other bits we already know + * get added automatically, such as the input stream it is associated with + * (though it can all be overridden of course) + */ + token->type = lexer->rec->state->type; + token->channel = lexer->rec->state->channel; + token->start = lexer->rec->state->tokenStartCharIndex; + token->stop = lexer->getCharIndex(lexer) - 1; + token->line = lexer->rec->state->tokenStartLine; + token->charPosition = lexer->rec->state->tokenStartCharPositionInLine; + + if (lexer->rec->state->text != NULL) + { + token->textState = ANTLR3_TEXT_STRING; + token->tokText.text = lexer->rec->state->text; + } + else + { + token->textState = ANTLR3_TEXT_NONE; + } + token->lineStart = lexer->input->currentLine; + token->user1 = lexer->rec->state->user1; + token->user2 = lexer->rec->state->user2; + token->user3 = lexer->rec->state->user3; + token->custom = lexer->rec->state->custom; + + lexer->rec->state->token = token; + + return token; +} + +/** + * Free the resources allocated by a lexer + */ +static void +freeLexer (pANTLR3_LEXER lexer) +{ + // This may have ben a delegate or delegator lexer, in which case the + // state may already have been freed (and set to NULL therefore) + // so we ignore the state if we don't have it. + // + if (lexer->rec->state != NULL) + { + if (lexer->rec->state->streams != NULL) + { + lexer->rec->state->streams->free(lexer->rec->state->streams); + } + if (lexer->rec->state->tokFactory != NULL) + { + lexer->rec->state->tokFactory->close(lexer->rec->state->tokFactory); + lexer->rec->state->tokFactory = NULL; + } + if (lexer->rec->state->tokSource != NULL) + { + ANTLR3_FREE(lexer->rec->state->tokSource); + lexer->rec->state->tokSource = NULL; + } + } + if (lexer->rec != NULL) + { + lexer->rec->free(lexer->rec); + lexer->rec = NULL; + } + ANTLR3_FREE(lexer); +} + +/** Implementation of matchs for the lexer, overrides any + * base implementation in the base recognizer. + * + * \remark + * Note that the generated code lays down arrays of ints for constant + * strings so that they are int UTF32 form! + */ +static ANTLR3_BOOLEAN +matchs(pANTLR3_LEXER lexer, ANTLR3_UCHAR * string) +{ + while (*string != ANTLR3_STRING_TERMINATOR) + { + if (lexer->input->istream->_LA(lexer->input->istream, 1) != (*string)) + { + if (lexer->rec->state->backtracking > 0) + { + lexer->rec->state->failed = ANTLR3_TRUE; + return ANTLR3_FALSE; + } + + lexer->rec->exConstruct(lexer->rec); + lexer->rec->state->failed = ANTLR3_TRUE; + + /* TODO: Implement exception creation more fully perhaps + */ + lexer->recover(lexer); + return ANTLR3_FALSE; + } + + /* Matched correctly, do consume it + */ + lexer->input->istream->consume(lexer->input->istream); + string++; + + /* Reset any failed indicator + */ + lexer->rec->state->failed = ANTLR3_FALSE; + } + + + return ANTLR3_TRUE; +} + +/** Implementation of matchc for the lexer, overrides any + * base implementation in the base recognizer. + * + * \remark + * Note that the generated code lays down arrays of ints for constant + * strings so that they are int UTF32 form! + */ +static ANTLR3_BOOLEAN +matchc(pANTLR3_LEXER lexer, ANTLR3_UCHAR c) +{ + if (lexer->input->istream->_LA(lexer->input->istream, 1) == c) + { + /* Matched correctly, do consume it + */ + lexer->input->istream->consume(lexer->input->istream); + + /* Reset any failed indicator + */ + lexer->rec->state->failed = ANTLR3_FALSE; + + return ANTLR3_TRUE; + } + + /* Failed to match, exception and recovery time. + */ + if (lexer->rec->state->backtracking > 0) + { + lexer->rec->state->failed = ANTLR3_TRUE; + return ANTLR3_FALSE; + } + + lexer->rec->exConstruct(lexer->rec); + + /* TODO: Implement exception creation more fully perhaps + */ + lexer->recover(lexer); + + return ANTLR3_FALSE; +} + +/** Implementation of match range for the lexer, overrides any + * base implementation in the base recognizer. + * + * \remark + * Note that the generated code lays down arrays of ints for constant + * strings so that they are int UTF32 form! + */ +static ANTLR3_BOOLEAN +matchRange(pANTLR3_LEXER lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high) +{ + ANTLR3_UCHAR c; + + /* What is in the stream at the moment? + */ + c = lexer->input->istream->_LA(lexer->input->istream, 1); + if ( c >= low && c <= high) + { + /* Matched correctly, consume it + */ + lexer->input->istream->consume(lexer->input->istream); + + /* Reset any failed indicator + */ + lexer->rec->state->failed = ANTLR3_FALSE; + + return ANTLR3_TRUE; + } + + /* Failed to match, execption and recovery time. + */ + + if (lexer->rec->state->backtracking > 0) + { + lexer->rec->state->failed = ANTLR3_TRUE; + return ANTLR3_FALSE; + } + + lexer->rec->exConstruct(lexer->rec); + + /* TODO: Implement exception creation more fully + */ + lexer->recover(lexer); + + return ANTLR3_FALSE; +} + +static void +matchAny (pANTLR3_LEXER lexer) +{ + lexer->input->istream->consume(lexer->input->istream); +} + +static void +recover (pANTLR3_LEXER lexer) +{ + lexer->input->istream->consume(lexer->input->istream); +} + +static ANTLR3_UINT32 +getLine (pANTLR3_LEXER lexer) +{ + return lexer->input->getLine(lexer->input); +} + +static ANTLR3_UINT32 +getCharPositionInLine (pANTLR3_LEXER lexer) +{ + return lexer->input->charPositionInLine; +} + +static ANTLR3_MARKER getCharIndex (pANTLR3_LEXER lexer) +{ + return lexer->input->istream->index(lexer->input->istream); +} + +static pANTLR3_STRING +getText (pANTLR3_LEXER lexer) +{ + if (lexer->rec->state->text) + { + return lexer->rec->state->text; + + } + return lexer->input->substr( + lexer->input, + lexer->rec->state->tokenStartCharIndex, + lexer->getCharIndex(lexer) - lexer->input->charByteSize + ); + +} + +static void * +getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream) +{ + return NULL; +} + +static void * +getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e, + ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow) +{ + return NULL; +} diff --git a/impl/antlr/libantlr3c-3.4/src/antlr3parser.c b/impl/antlr/libantlr3c-3.4/src/antlr3parser.c new file mode 100644 index 0000000..2b7e0e3 --- /dev/null +++ b/impl/antlr/libantlr3c-3.4/src/antlr3parser.c @@ -0,0 +1,193 @@ +/** \file + * Implementation of the base functionality for an ANTLR3 parser. + */ + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <antlr3parser.h> + +/* Parser API + */ +static void setDebugListener (pANTLR3_PARSER parser, pANTLR3_DEBUG_EVENT_LISTENER dbg); +static void setTokenStream (pANTLR3_PARSER parser, pANTLR3_TOKEN_STREAM); +static pANTLR3_TOKEN_STREAM getTokenStream (pANTLR3_PARSER parser); +static void freeParser (pANTLR3_PARSER parser); + +ANTLR3_API pANTLR3_PARSER +antlr3ParserNewStreamDbg (ANTLR3_UINT32 sizeHint, pANTLR3_TOKEN_STREAM tstream, pANTLR3_DEBUG_EVENT_LISTENER dbg, pANTLR3_RECOGNIZER_SHARED_STATE state) +{ + pANTLR3_PARSER parser; + + parser = antlr3ParserNewStream(sizeHint, tstream, state); + + if (parser == NULL) + { + return NULL; + } + + parser->setDebugListener(parser, dbg); + + return parser; +} + +ANTLR3_API pANTLR3_PARSER +antlr3ParserNew (ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state) +{ + pANTLR3_PARSER parser; + + /* Allocate memory + */ + parser = (pANTLR3_PARSER) ANTLR3_MALLOC(sizeof(ANTLR3_PARSER)); + + if (parser == NULL) + { + return NULL; + } + + /* Install a base parser + */ + parser->rec = antlr3BaseRecognizerNew(ANTLR3_TYPE_PARSER, sizeHint, state); + + if (parser->rec == NULL) + { + parser->free(parser); + return NULL; + } + + parser->rec->super = parser; + + /* Parser overrides + */ + parser->rec->exConstruct = antlr3MTExceptionNew; + + /* Install the API + */ + parser->setDebugListener = setDebugListener; + parser->setTokenStream = setTokenStream; + parser->getTokenStream = getTokenStream; + + parser->free = freeParser; + + return parser; +} + +ANTLR3_API pANTLR3_PARSER +antlr3ParserNewStream (ANTLR3_UINT32 sizeHint, pANTLR3_TOKEN_STREAM tstream, pANTLR3_RECOGNIZER_SHARED_STATE state) +{ + pANTLR3_PARSER parser; + + parser = antlr3ParserNew(sizeHint, state); + + if (parser == NULL) + { + return NULL; + } + + /* Everything seems to be hunky dory so we can install the + * token stream. + */ + parser->setTokenStream(parser, tstream); + + return parser; +} + +static void +freeParser (pANTLR3_PARSER parser) +{ + if (parser->rec != NULL) + { + // This may have ben a delegate or delegator parser, in which case the + // state may already have been freed (and set to NULL therefore) + // so we ignore the state if we don't have it. + // + if (parser->rec->state != NULL) + { + if (parser->rec->state->following != NULL) + { + parser->rec->state->following->free(parser->rec->state->following); + parser->rec->state->following = NULL; + } + } + parser->rec->free(parser->rec); + parser->rec = NULL; + + } + ANTLR3_FREE(parser); +} + +static void +setDebugListener (pANTLR3_PARSER parser, pANTLR3_DEBUG_EVENT_LISTENER dbg) +{ + // Set the debug listener. There are no methods to override + // because currently the only ones that notify the debugger + // are error reporting and recovery. Hence we can afford to + // check and see if the debugger interface is null or not + // there. If there is ever an occasion for a performance + // sensitive function to use the debugger interface, then + // a replacement function for debug mode should be supplied + // and installed here. + // + parser->rec->debugger = dbg; + + // If there was a tokenstream installed already + // then we need to tell it about the debug interface + // + if (parser->tstream != NULL) + { + parser->tstream->setDebugListener(parser->tstream, dbg); + } +} + +static void +setTokenStream (pANTLR3_PARSER parser, pANTLR3_TOKEN_STREAM tstream) +{ + parser->tstream = tstream; + parser->rec->reset(parser->rec); +} + +static pANTLR3_TOKEN_STREAM +getTokenStream (pANTLR3_PARSER parser) +{ + return parser->tstream; +} + + + + + + + + + + + + + + diff --git a/impl/antlr/libantlr3c-3.4/src/antlr3rewritestreams.c b/impl/antlr/libantlr3c-3.4/src/antlr3rewritestreams.c new file mode 100644 index 0000000..9afb6e1 --- /dev/null +++ b/impl/antlr/libantlr3c-3.4/src/antlr3rewritestreams.c @@ -0,0 +1,844 @@ +/// \file +/// Implementation of token/tree streams that are used by the +/// tree re-write rules to manipulate the tokens and trees produced +/// by rules that are subject to rewrite directives. +/// + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <antlr3rewritestreams.h> + +// Static support function forward declarations for the stream types. +// +static void reset (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream); +static void add (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream, void * el, void (ANTLR3_CDECL *freePtr)(void *)); +static void * next (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream); +static pANTLR3_BASE_TREE nextTree (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream); +static void * nextToken (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream); +static void * _next (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream); +static void * dupTok (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream, void * el); +static void * dupTree (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream, void * el); +static void * dupTreeNode (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream, void * el); +static pANTLR3_BASE_TREE toTree (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream, void * element); +static pANTLR3_BASE_TREE toTreeNode (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream, void * element); +static ANTLR3_BOOLEAN hasNext (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream); +static pANTLR3_BASE_TREE nextNode (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream); +static pANTLR3_BASE_TREE nextNodeNode (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream); +static pANTLR3_BASE_TREE nextNodeToken (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream); +static ANTLR3_UINT32 size (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream); +static void * getDescription (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream); +static void freeRS (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream); +static void expungeRS (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream); + + +// Place a now unused rewrite stream back on the rewrite stream pool +// so we can reuse it if we need to. +// +static void +freeRS (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream) +{ + // Before placing the stream back in the pool, we + // need to clear any vector it has. This is so any + // free pointers that are associated with the + // entires are called. + // + if (stream->elements != NULL) + { + // Factory generated vectors can be returned to the + // vector factory for later reuse. + // + if (stream->elements->factoryMade == ANTLR3_TRUE) + { + pANTLR3_VECTOR_FACTORY factory = ((pANTLR3_COMMON_TREE_ADAPTOR)(stream->adaptor->super))->arboretum->vFactory; + factory->returnVector(factory, stream->elements); + + stream->elements = NULL; + } + else + { + // Other vectors we clear and allow to be reused if they come off the + // rewrite stream free stack and are reused. + // + stream->elements->clear(stream->elements); + stream->freeElements = ANTLR3_TRUE; + } + } + else + { + stream->freeElements = ANTLR3_FALSE; // Just in case + } + + // Add the stream into the recognizer stream stack vector + // adding the stream memory free routine so that + // it is thrown away when the stack vector is destroyed + // + stream->rec->state->rStreams->add(stream->rec->state->rStreams, stream, (void(*)(void *))expungeRS); +} + +/** Do special nilNode reuse detection for node streams. + */ +static void +freeNodeRS(pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream) +{ + pANTLR3_BASE_TREE tree; + + // Before placing the stream back in the pool, we + // need to clear any vector it has. This is so any + // free pointers that are associated with the + // entires are called. However, if this particular function is called + // then we know that the entries in the stream are definately + // tree nodes. Hence we check to see if any of them were nilNodes as + // if they were, we can reuse them. + // + if (stream->elements != NULL) + { + // We have some elements to traverse + // + ANTLR3_UINT32 i; + + for (i = 1; i<= stream->elements->count; i++) + { + tree = (pANTLR3_BASE_TREE)(stream->elements->elements[i-1].element); + if (tree != NULL && tree->isNilNode(tree)) + { + // Had to remove this for now, check is not comprehensive enough + // tree->reuse(tree); + } + + } + // Factory generated vectors can be returned to the + // vector factory for later reuse. + // + if (stream->elements->factoryMade == ANTLR3_TRUE) + { + pANTLR3_VECTOR_FACTORY factory = ((pANTLR3_COMMON_TREE_ADAPTOR)(stream->adaptor->super))->arboretum->vFactory; + factory->returnVector(factory, stream->elements); + + stream->elements = NULL; + } + else + { + stream->elements->clear(stream->elements); + stream->freeElements = ANTLR3_TRUE; + } + } + else + { + if (stream->singleElement != NULL) + { + tree = (pANTLR3_BASE_TREE)(stream->singleElement); + if (tree->isNilNode(tree)) + { + // Had to remove this for now, check is not comprehensive enough + // tree->reuse(tree); + } + } + stream->singleElement = NULL; + stream->freeElements = ANTLR3_FALSE; // Just in case + } + + // Add the stream into the recognizer stream stack vector + // adding the stream memory free routine so that + // it is thrown away when the stack vector is destroyed + // + stream->rec->state->rStreams->add(stream->rec->state->rStreams, stream, (void(*)(void *))expungeRS); +} +static void +expungeRS(pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream) +{ + + if (stream->freeElements == ANTLR3_TRUE && stream->elements != NULL) + { + stream->elements->free(stream->elements); + } + ANTLR3_FREE(stream); +} + +// Functions for creating streams +// +static pANTLR3_REWRITE_RULE_ELEMENT_STREAM +antlr3RewriteRuleElementStreamNewAE(pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_RECOGNIZER rec, pANTLR3_UINT8 description) +{ + pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream; + + // First - do we already have a rewrite stream that was returned + // to the pool? If we do, then we will just reuse it by resetting + // the generic interface. + // + if (rec->state->rStreams->count > 0) + { + // Remove the entry from the vector. We do not + // cause it to be freed by using remove. + // + stream = rec->state->rStreams->remove(rec->state->rStreams, rec->state->rStreams->count - 1); + + // We found a stream we can reuse. + // If the stream had a vector, then it will have been cleared + // when the freeRS was called that put it in this stack + // + } + else + { + // Ok, we need to allocate a new one as there were none on the stack. + // First job is to create the memory we need. + // + stream = (pANTLR3_REWRITE_RULE_ELEMENT_STREAM) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_REWRITE_RULE_ELEMENT_STREAM))); + + if (stream == NULL) + { + return NULL; + } + stream->elements = NULL; + stream->freeElements = ANTLR3_FALSE; + } + + // Populate the generic interface + // + stream->rec = rec; + stream->reset = reset; + stream->add = add; + stream->next = next; + stream->nextTree = nextTree; + stream->nextNode = nextNode; + stream->nextToken = nextToken; + stream->_next = _next; + stream->hasNext = hasNext; + stream->size = size; + stream->getDescription = getDescription; + stream->toTree = toTree; + stream->free = freeRS; + stream->singleElement = NULL; + + // Reset the stream to empty. + // + + stream->cursor = 0; + stream->dirty = ANTLR3_FALSE; + + // Install the description + // + stream->elementDescription = description; + + // Install the adaptor + // + stream->adaptor = adaptor; + + return stream; +} + +static pANTLR3_REWRITE_RULE_ELEMENT_STREAM +antlr3RewriteRuleElementStreamNewAEE(pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_RECOGNIZER rec, pANTLR3_UINT8 description, void * oneElement) +{ + pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream; + + // First job is to create the memory we need. + // + stream = antlr3RewriteRuleElementStreamNewAE(adaptor, rec, description); + + if (stream == NULL) + { + return NULL; + } + + // Stream seems good so we need to add the supplied element + // + if (oneElement != NULL) + { + stream->add(stream, oneElement, NULL); + } + return stream; +} + +static pANTLR3_REWRITE_RULE_ELEMENT_STREAM +antlr3RewriteRuleElementStreamNewAEV(pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_RECOGNIZER rec, pANTLR3_UINT8 description, pANTLR3_VECTOR vector) +{ + pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream; + + // First job is to create the memory we need. + // + stream = antlr3RewriteRuleElementStreamNewAE(adaptor, rec, description); + + if (stream == NULL) + { + return stream; + } + + // Stream seems good so we need to install the vector we were + // given. We assume that someone else is going to free the + // vector. + // + if (stream->elements != NULL && stream->elements->factoryMade == ANTLR3_FALSE && stream->freeElements == ANTLR3_TRUE ) + { + stream->elements->free(stream->elements); + } + stream->elements = vector; + stream->freeElements = ANTLR3_FALSE; + return stream; +} + +// Token rewrite stream ... +// +ANTLR3_API pANTLR3_REWRITE_RULE_TOKEN_STREAM +antlr3RewriteRuleTOKENStreamNewAE(pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_RECOGNIZER rec, pANTLR3_UINT8 description) +{ + pANTLR3_REWRITE_RULE_TOKEN_STREAM stream; + + // First job is to create the memory we need. + // + stream = antlr3RewriteRuleElementStreamNewAE(adaptor, rec, description); + + if (stream == NULL) + { + return stream; + } + + // Install the token based overrides + // + stream->dup = dupTok; + stream->nextNode = nextNodeToken; + + // No nextNode implementation for a token rewrite stream + // + return stream; +} + +ANTLR3_API pANTLR3_REWRITE_RULE_TOKEN_STREAM +antlr3RewriteRuleTOKENStreamNewAEE(pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_RECOGNIZER rec, pANTLR3_UINT8 description, void * oneElement) +{ + pANTLR3_REWRITE_RULE_TOKEN_STREAM stream; + + // First job is to create the memory we need. + // + stream = antlr3RewriteRuleElementStreamNewAEE(adaptor, rec, description, oneElement); + + // Install the token based overrides + // + stream->dup = dupTok; + stream->nextNode = nextNodeToken; + + // No nextNode implementation for a token rewrite stream + // + return stream; +} + +ANTLR3_API pANTLR3_REWRITE_RULE_TOKEN_STREAM +antlr3RewriteRuleTOKENStreamNewAEV(pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_RECOGNIZER rec, pANTLR3_UINT8 description, pANTLR3_VECTOR vector) +{ + pANTLR3_REWRITE_RULE_TOKEN_STREAM stream; + + // First job is to create the memory we need. + // + stream = antlr3RewriteRuleElementStreamNewAEV(adaptor, rec, description, vector); + + // Install the token based overrides + // + stream->dup = dupTok; + stream->nextNode = nextNodeToken; + + // No nextNode implementation for a token rewrite stream + // + return stream; +} + +// Subtree rewrite stream +// +ANTLR3_API pANTLR3_REWRITE_RULE_SUBTREE_STREAM +antlr3RewriteRuleSubtreeStreamNewAE(pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_RECOGNIZER rec, pANTLR3_UINT8 description) +{ + pANTLR3_REWRITE_RULE_SUBTREE_STREAM stream; + + // First job is to create the memory we need. + // + stream = antlr3RewriteRuleElementStreamNewAE(adaptor, rec, description); + + if (stream == NULL) + { + return stream; + } + + // Install the subtree based overrides + // + stream->dup = dupTree; + stream->nextNode = nextNode; + stream->free = freeNodeRS; + return stream; + +} +ANTLR3_API pANTLR3_REWRITE_RULE_SUBTREE_STREAM +antlr3RewriteRuleSubtreeStreamNewAEE(pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_RECOGNIZER rec, pANTLR3_UINT8 description, void * oneElement) +{ + pANTLR3_REWRITE_RULE_SUBTREE_STREAM stream; + + // First job is to create the memory we need. + // + stream = antlr3RewriteRuleElementStreamNewAEE(adaptor, rec, description, oneElement); + + if (stream == NULL) + { + return stream; + } + + // Install the subtree based overrides + // + stream->dup = dupTree; + stream->nextNode = nextNode; + stream->free = freeNodeRS; + + return stream; +} + +ANTLR3_API pANTLR3_REWRITE_RULE_SUBTREE_STREAM +antlr3RewriteRuleSubtreeStreamNewAEV(pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_RECOGNIZER rec, pANTLR3_UINT8 description, pANTLR3_VECTOR vector) +{ + pANTLR3_REWRITE_RULE_SUBTREE_STREAM stream; + + // First job is to create the memory we need. + // + stream = antlr3RewriteRuleElementStreamNewAEV(adaptor, rec, description, vector); + + if (stream == NULL) + { + return NULL; + } + + // Install the subtree based overrides + // + stream->dup = dupTree; + stream->nextNode = nextNode; + stream->free = freeNodeRS; + + return stream; +} +// Node rewrite stream ... +// +ANTLR3_API pANTLR3_REWRITE_RULE_NODE_STREAM +antlr3RewriteRuleNODEStreamNewAE(pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_RECOGNIZER rec, pANTLR3_UINT8 description) +{ + pANTLR3_REWRITE_RULE_NODE_STREAM stream; + + // First job is to create the memory we need. + // + stream = antlr3RewriteRuleElementStreamNewAE(adaptor, rec, description); + + if (stream == NULL) + { + return stream; + } + + // Install the node based overrides + // + stream->dup = dupTreeNode; + stream->toTree = toTreeNode; + stream->nextNode = nextNodeNode; + stream->free = freeNodeRS; + + return stream; +} + +ANTLR3_API pANTLR3_REWRITE_RULE_NODE_STREAM +antlr3RewriteRuleNODEStreamNewAEE(pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_RECOGNIZER rec, pANTLR3_UINT8 description, void * oneElement) +{ + pANTLR3_REWRITE_RULE_NODE_STREAM stream; + + // First job is to create the memory we need. + // + stream = antlr3RewriteRuleElementStreamNewAEE(adaptor, rec, description, oneElement); + + // Install the node based overrides + // + stream->dup = dupTreeNode; + stream->toTree = toTreeNode; + stream->nextNode = nextNodeNode; + stream->free = freeNodeRS; + + return stream; +} + +ANTLR3_API pANTLR3_REWRITE_RULE_NODE_STREAM +antlr3RewriteRuleNODEStreamNewAEV(pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_RECOGNIZER rec, pANTLR3_UINT8 description, pANTLR3_VECTOR vector) +{ + pANTLR3_REWRITE_RULE_NODE_STREAM stream; + + // First job is to create the memory we need. + // + stream = antlr3RewriteRuleElementStreamNewAEV(adaptor, rec, description, vector); + + // Install the Node based overrides + // + stream->dup = dupTreeNode; + stream->toTree = toTreeNode; + stream->nextNode = nextNodeNode; + stream->free = freeNodeRS; + + return stream; +} + +//---------------------------------------------------------------------- +// Static support functions + +/// Reset the condition of this stream so that it appears we have +/// not consumed any of its elements. Elements themselves are untouched. +/// +static void +reset (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream) +{ + stream->dirty = ANTLR3_TRUE; + stream->cursor = 0; +} + +// Add a new pANTLR3_BASE_TREE to this stream +// +static void +add (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream, void * el, void (ANTLR3_CDECL *freePtr)(void *)) +{ + if (el== NULL) + { + return; + } + // As we may be reusing a stream, we may already have allocated + // a rewrite stream vector. If we have then is will be empty if + // we have either zero or just one element in the rewrite stream + // + if (stream->elements != NULL && stream->elements->count > 0) + { + // We already have >1 entries in the stream. So we can just add this new element to the existing + // collection. + // + stream->elements->add(stream->elements, el, freePtr); + return; + } + if (stream->singleElement == NULL) + { + stream->singleElement = el; + return; + } + + // If we got here then we had only the one element so far + // and we must now create a vector to hold a collection of them + // + if (stream->elements == NULL) + { + pANTLR3_VECTOR_FACTORY factory = ((pANTLR3_COMMON_TREE_ADAPTOR)(stream->adaptor->super))->arboretum->vFactory; + + + stream->elements = factory->newVector(factory); + stream->freeElements = ANTLR3_TRUE; // We 'ummed it, so we play it son. + } + + stream->elements->add (stream->elements, stream->singleElement, freePtr); + stream->elements->add (stream->elements, el, freePtr); + stream->singleElement = NULL; + + return; +} + +/// Return the next element in the stream. If out of elements, throw +/// an exception unless size()==1. If size is 1, then return elements[0]. +/// Return a duplicate node/subtree if stream is out of elements and +/// size==1. If we've already used the element, dup (dirty bit set). +/// +static pANTLR3_BASE_TREE +nextTree(pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream) +{ + ANTLR3_UINT32 n; + void * el; + + n = stream->size(stream); + + if ( stream->dirty || (stream->cursor >=n && n==1) ) + { + // if out of elements and size is 1, dup + // + el = stream->_next(stream); + return stream->dup(stream, el); + } + + // test size above then fetch + // + el = stream->_next(stream); + return el; +} + +/// Return the next element for a caller that wants just the token +/// +static void * +nextToken (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream) +{ + return stream->_next(stream); +} + +/// Return the next element in the stream. If out of elements, throw +/// an exception unless size()==1. If size is 1, then return elements[0]. +/// +static void * +next (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream) +{ + ANTLR3_UINT32 s; + + s = stream->size(stream); + if (stream->cursor >= s && s == 1) + { + pANTLR3_BASE_TREE el; + + el = stream->_next(stream); + + return stream->dup(stream, el); + } + + return stream->_next(stream); +} + +/// Do the work of getting the next element, making sure that it's +/// a tree node or subtree. Deal with the optimization of single- +/// element list versus list of size > 1. Throw an exception (or something similar) +/// if the stream is empty or we're out of elements and size>1. +/// You can override in a 'subclass' if necessary. +/// +static void * +_next (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream) +{ + ANTLR3_UINT32 n; + pANTLR3_BASE_TREE t; + + n = stream->size(stream); + + if (n == 0) + { + // This means that the stream is empty + // + return NULL; // Caller must cope with this + } + + // Traversed all the available elements already? + // + if (stream->cursor >= n) + { + if (n == 1) + { + // Special case when size is single element, it will just dup a lot + // + return stream->toTree(stream, stream->singleElement); + } + + // Out of elements and the size is not 1, so we cannot assume + // that we just duplicate the entry n times (such as ID ent+ -> ^(ID ent)+) + // This means we ran out of elements earlier than was expected. + // + return NULL; // Caller must cope with this + } + + // Elements available either for duping or just available + // + if (stream->singleElement != NULL) + { + stream->cursor++; // Cursor advances even for single element as this tells us to dup() + return stream->toTree(stream, stream->singleElement); + } + + // More than just a single element so we extract it from the + // vector. + // + t = stream->toTree(stream, stream->elements->get(stream->elements, stream->cursor)); + stream->cursor++; + return t; +} + +#ifdef ANTLR3_WINDOWS +#pragma warning(push) +#pragma warning(disable : 4100) +#endif +/// When constructing trees, sometimes we need to dup a token or AST +/// subtree. Dup'ing a token means just creating another AST node +/// around it. For trees, you must call the adaptor.dupTree(). +/// +static void * +dupTok (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream, void * el) +{ + ANTLR3_FPRINTF(stderr, "dup() cannot be called on a token rewrite stream!!"); + return NULL; +} +#ifdef ANTLR3_WINDOWS +#pragma warning(pop) +#endif + +/// When constructing trees, sometimes we need to dup a token or AST +/// subtree. Dup'ing a token means just creating another AST node +/// around it. For trees, you must call the adaptor.dupTree(). +/// +static void * +dupTree (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream, void * element) +{ + return stream->adaptor->dupNode(stream->adaptor, (pANTLR3_BASE_TREE)element); +} + +#ifdef ANTLR3_WINDOWS +#pragma warning(push) +#pragma warning(disable : 4100) +#endif +/// When constructing trees, sometimes we need to dup a token or AST +/// subtree. Dup'ing a token means just creating another AST node +/// around it. For trees, you must call the adaptor.dupTree(). +/// +static void * +dupTreeNode (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream, void * element) +{ + ANTLR3_FPRINTF(stderr, "dup() cannot be called on a node rewrite stream!!!"); + return NULL; +} + + +/// We don;t explicitly convert to a tree unless the call goes to +/// nextTree, which means rewrites are heterogeneous +/// +static pANTLR3_BASE_TREE +toTree (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream, void * element) +{ + return (pANTLR3_BASE_TREE)element; +} +#ifdef ANTLR3_WINDOWS +#pragma warning(pop) +#endif + +/// Ensure stream emits trees; tokens must be converted to AST nodes. +/// AST nodes can be passed through unmolested. +/// +#ifdef ANTLR3_WINDOWS +#pragma warning(push) +#pragma warning(disable : 4100) +#endif + +static pANTLR3_BASE_TREE +toTreeNode (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream, void * element) +{ + return stream->adaptor->dupNode(stream->adaptor, (pANTLR3_BASE_TREE)element); +} + +#ifdef ANTLR3_WINDOWS +#pragma warning(pop) +#endif + +/// Returns ANTLR3_TRUE if there is a next element available +/// +static ANTLR3_BOOLEAN +hasNext (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream) +{ + if ( (stream->singleElement != NULL && stream->cursor < 1) + || (stream->elements != NULL && stream->cursor < stream->elements->size(stream->elements))) + { + return ANTLR3_TRUE; + } + else + { + return ANTLR3_FALSE; + } +} + +/// Get the next token from the list and create a node for it +/// This is the implementation for token streams. +/// +static pANTLR3_BASE_TREE +nextNodeToken(pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream) +{ + return stream->adaptor->create(stream->adaptor, stream->_next(stream)); +} + +static pANTLR3_BASE_TREE +nextNodeNode(pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream) +{ + return stream->_next(stream); +} + +/// Treat next element as a single node even if it's a subtree. +/// This is used instead of next() when the result has to be a +/// tree root node. Also prevents us from duplicating recently-added +/// children; e.g., ^(type ID)+ adds ID to type and then 2nd iteration +/// must dup the type node, but ID has been added. +/// +/// Referencing to a rule result twice is ok; dup entire tree as +/// we can't be adding trees; e.g., expr expr. +/// +static pANTLR3_BASE_TREE +nextNode (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream) +{ + + ANTLR3_UINT32 n; + pANTLR3_BASE_TREE el = stream->_next(stream); + + n = stream->size(stream); + if (stream->dirty == ANTLR3_TRUE || (stream->cursor > n && n == 1)) + { + // We are out of elements and the size is 1, which means we just + // dup the node that we have + // + return stream->adaptor->dupNode(stream->adaptor, el); + } + + // We were not out of nodes, so the one we received is the one to return + // + return el; +} + +/// Number of elements available in the stream +/// +static ANTLR3_UINT32 +size (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream) +{ + ANTLR3_UINT32 n = 0; + + /// Should be a count of one if singleElement is set. I copied this + /// logic from the java implementation, which I suspect is just guarding + /// against someone setting singleElement and forgetting to NULL it out + /// + if (stream->singleElement != NULL) + { + n = 1; + } + else + { + if (stream->elements != NULL) + { + return (ANTLR3_UINT32)(stream->elements->count); + } + } + return n; +} + +/// Returns the description string if there is one available (check for NULL). +/// +static void * +getDescription (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream) +{ + if (stream->elementDescription == NULL) + { + stream->elementDescription = "<unknown source>"; + } + + return stream->elementDescription; +} diff --git a/impl/antlr/libantlr3c-3.4/src/antlr3string.c b/impl/antlr/libantlr3c-3.4/src/antlr3string.c new file mode 100644 index 0000000..b29c020 --- /dev/null +++ b/impl/antlr/libantlr3c-3.4/src/antlr3string.c @@ -0,0 +1,1402 @@ +/** \file + * Implementation of the ANTLR3 string and string factory classes + */ + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <antlr3string.h> + +/* Factory API + */ +static pANTLR3_STRING newRaw8 (pANTLR3_STRING_FACTORY factory); +static pANTLR3_STRING newRawUTF16 (pANTLR3_STRING_FACTORY factory); +static pANTLR3_STRING newSize8 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size); +static pANTLR3_STRING newSizeUTF16 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size); +static pANTLR3_STRING newPtr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size); +static pANTLR3_STRING newPtrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size); +static pANTLR3_STRING newPtrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size); +static pANTLR3_STRING newStr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string); +static pANTLR3_STRING newStrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string); +static pANTLR3_STRING newStrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string); +static void destroy (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string); +static pANTLR3_STRING printable8 (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string); +static pANTLR3_STRING printableUTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string); +static void closeFactory(pANTLR3_STRING_FACTORY factory); + +/* String API + */ +static pANTLR3_UINT8 set8 (pANTLR3_STRING string, const char * chars); +static pANTLR3_UINT8 setUTF16_8 (pANTLR3_STRING string, const char * chars); +static pANTLR3_UINT8 setUTF16_UTF16 (pANTLR3_STRING string, const char * chars); +static pANTLR3_UINT8 append8 (pANTLR3_STRING string, const char * newbit); +static pANTLR3_UINT8 appendUTF16_8 (pANTLR3_STRING string, const char * newbit); +static pANTLR3_UINT8 appendUTF16_UTF16 (pANTLR3_STRING string, const char * newbit); +static pANTLR3_UINT8 insert8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit); +static pANTLR3_UINT8 insertUTF16_8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit); +static pANTLR3_UINT8 insertUTF16_UTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit); + +static pANTLR3_UINT8 setS (pANTLR3_STRING string, pANTLR3_STRING chars); +static pANTLR3_UINT8 appendS (pANTLR3_STRING string, pANTLR3_STRING newbit); +static pANTLR3_UINT8 insertS (pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit); + +static pANTLR3_UINT8 addc8 (pANTLR3_STRING string, ANTLR3_UINT32 c); +static pANTLR3_UINT8 addcUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 c); +static pANTLR3_UINT8 addi8 (pANTLR3_STRING string, ANTLR3_INT32 i); +static pANTLR3_UINT8 addiUTF16 (pANTLR3_STRING string, ANTLR3_INT32 i); +static pANTLR3_UINT8 inserti8 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i); +static pANTLR3_UINT8 insertiUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i); + +static ANTLR3_UINT32 compare8 (pANTLR3_STRING string, const char * compStr); +static ANTLR3_UINT32 compareUTF16_8 (pANTLR3_STRING string, const char * compStr); +static ANTLR3_UINT32 compareUTF16_UTF16(pANTLR3_STRING string, const char * compStr); +static ANTLR3_UINT32 compareS (pANTLR3_STRING string, pANTLR3_STRING compStr); +static ANTLR3_UCHAR charAt8 (pANTLR3_STRING string, ANTLR3_UINT32 offset); +static ANTLR3_UCHAR charAtUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 offset); +static pANTLR3_STRING subString8 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex); +static pANTLR3_STRING subStringUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex); +static ANTLR3_INT32 toInt32_8 (pANTLR3_STRING string); +static ANTLR3_INT32 toInt32_UTF16 (pANTLR3_STRING string); +static pANTLR3_STRING to8_8 (pANTLR3_STRING string); +static pANTLR3_STRING to8_UTF16 (pANTLR3_STRING string); +static pANTLR3_STRING toUTF8_8 (pANTLR3_STRING string); +static pANTLR3_STRING toUTF8_UTF16 (pANTLR3_STRING string); + +/* Local helpers + */ +static void stringInit8 (pANTLR3_STRING string); +static void stringInitUTF16 (pANTLR3_STRING string); +static void ANTLR3_CDECL stringFree (pANTLR3_STRING string); + +ANTLR3_API pANTLR3_STRING_FACTORY +antlr3StringFactoryNew(ANTLR3_UINT32 encoding) +{ + pANTLR3_STRING_FACTORY factory; + + /* Allocate memory + */ + factory = (pANTLR3_STRING_FACTORY) ANTLR3_CALLOC(1, sizeof(ANTLR3_STRING_FACTORY)); + + if (factory == NULL) + { + return NULL; + } + + /* Now we make a new list to track the strings. + */ + factory->strings = antlr3VectorNew(0); + factory->index = 0; + + if (factory->strings == NULL) + { + ANTLR3_FREE(factory); + return NULL; + } + + // Install the API + // + // TODO: These encodings need equivalent functions to + // UTF16 and 8Bit if I am going to support those encodings in the STRING stuff. + // The STRING stuff was intended as a quick and dirty hack for people that did not + // want to worry about memory and performance very much, but nobody ever reads the + // notes or comments or uses the email list search. I want to discourage using these + // interfaces as it is much more efficient to use the pointers within the tokens + // directly, so I am not implementing the string stuff for the newer encodings. + // We install the standard 8 and 16 bit functions for the UTF 8 and 16 but they + // will not be useful beyond returning the text. + // + switch(encoding) + { + case ANTLR3_ENC_UTF32: + break; + + case ANTLR3_ENC_UTF32BE: + break; + + case ANTLR3_ENC_UTF32LE: + break; + + case ANTLR3_ENC_UTF16BE: + case ANTLR3_ENC_UTF16LE: + case ANTLR3_ENC_UTF16: + + factory->newRaw = newRawUTF16; + factory->newSize = newSizeUTF16; + factory->newPtr = newPtrUTF16_UTF16; + factory->newPtr8 = newPtrUTF16_8; + factory->newStr = newStrUTF16_UTF16; + factory->newStr8 = newStrUTF16_8; + factory->printable = printableUTF16; + factory->destroy = destroy; + factory->close = closeFactory; + break; + + case ANTLR3_ENC_UTF8: + case ANTLR3_ENC_EBCDIC: + case ANTLR3_ENC_8BIT: + default: + + factory->newRaw = newRaw8; + factory->newSize = newSize8; + factory->newPtr = newPtr8; + factory->newPtr8 = newPtr8; + factory->newStr = newStr8; + factory->newStr8 = newStr8; + factory->printable = printable8; + factory->destroy = destroy; + factory->close = closeFactory; + break; + } + return factory; +} + + +/** + * + * \param factory + * \return + */ +static pANTLR3_STRING +newRaw8 (pANTLR3_STRING_FACTORY factory) +{ + pANTLR3_STRING string; + + string = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING)); + + if (string == NULL) + { + return NULL; + } + + /* Structure is allocated, now fill in the API etc. + */ + stringInit8(string); + string->factory = factory; + + /* Add the string into the allocated list + */ + factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE); + string->index = factory->index++; + + return string; +} +/** + * + * \param factory + * \return + */ +static pANTLR3_STRING +newRawUTF16 (pANTLR3_STRING_FACTORY factory) +{ + pANTLR3_STRING string; + + string = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING)); + + if (string == NULL) + { + return NULL; + } + + /* Structure is allocated, now fill in the API etc. + */ + stringInitUTF16(string); + string->factory = factory; + + /* Add the string into the allocated list + */ + factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE); + string->index = factory->index++; + + return string; +} +static +void ANTLR3_CDECL stringFree (pANTLR3_STRING string) +{ + /* First free the string itself if there was anything in it + */ + if (string->chars) + { + ANTLR3_FREE(string->chars); + } + + /* Now free the space for this string + */ + ANTLR3_FREE(string); + + return; +} +/** + * + * \param string + * \return + */ +static void +stringInit8 (pANTLR3_STRING string) +{ + string->len = 0; + string->size = 0; + string->chars = NULL; + string->encoding = ANTLR3_ENC_8BIT ; + + /* API for 8 bit strings*/ + + string->set = set8; + string->set8 = set8; + string->append = append8; + string->append8 = append8; + string->insert = insert8; + string->insert8 = insert8; + string->addi = addi8; + string->inserti = inserti8; + string->addc = addc8; + string->charAt = charAt8; + string->compare = compare8; + string->compare8 = compare8; + string->subString = subString8; + string->toInt32 = toInt32_8; + string->to8 = to8_8; + string->toUTF8 = toUTF8_8; + string->compareS = compareS; + string->setS = setS; + string->appendS = appendS; + string->insertS = insertS; + +} +/** + * + * \param string + * \return + */ +static void +stringInitUTF16 (pANTLR3_STRING string) +{ + string->len = 0; + string->size = 0; + string->chars = NULL; + string->encoding = ANTLR3_ENC_8BIT; + + /* API for UTF16 strings */ + + string->set = setUTF16_UTF16; + string->set8 = setUTF16_8; + string->append = appendUTF16_UTF16; + string->append8 = appendUTF16_8; + string->insert = insertUTF16_UTF16; + string->insert8 = insertUTF16_8; + string->addi = addiUTF16; + string->inserti = insertiUTF16; + string->addc = addcUTF16; + string->charAt = charAtUTF16; + string->compare = compareUTF16_UTF16; + string->compare8 = compareUTF16_8; + string->subString = subStringUTF16; + string->toInt32 = toInt32_UTF16; + string->to8 = to8_UTF16; + string->toUTF8 = toUTF8_UTF16; + + string->compareS = compareS; + string->setS = setS; + string->appendS = appendS; + string->insertS = insertS; +} +/** + * + * \param string + * \return + * TODO: Implement UTF-8 + */ +static void +stringInitUTF8 (pANTLR3_STRING string) +{ + string->len = 0; + string->size = 0; + string->chars = NULL; + + /* API */ + +} + +// Convert an 8 bit string into a UTF8 representation, which is in fact just the string itself +// a memcpy as we make no assumptions about the 8 bit encoding. +// +static pANTLR3_STRING +toUTF8_8 (pANTLR3_STRING string) +{ + return string->factory->newPtr(string->factory, (pANTLR3_UINT8)(string->chars), string->len); +} + +// Convert a UTF16 string into a UTF8 representation using the Unicode.org +// supplied C algorithms, which are now contained within the ANTLR3 C runtime +// as permitted by the Unicode license (within the source code antlr3convertutf.c/.h +// UCS2 has the same encoding as UTF16 so we can use UTF16 converter. +// +static pANTLR3_STRING +toUTF8_UTF16 (pANTLR3_STRING string) +{ + + UTF8 * outputEnd; + UTF16 * inputEnd; + pANTLR3_STRING utf8String; + + ConversionResult cResult; + + // Allocate the output buffer, which needs to accommodate potentially + // 3X (in bytes) the input size (in chars). + // + utf8String = string->factory->newStr8(string->factory, (pANTLR3_UINT8)""); + + if (utf8String != NULL) + { + // Free existing allocation + // + ANTLR3_FREE(utf8String->chars); + + // Reallocate according to maximum expected size + // + utf8String->size = string->len *3; + utf8String->chars = (pANTLR3_UINT8)ANTLR3_MALLOC(utf8String->size +1); + + if (utf8String->chars != NULL) + { + inputEnd = (UTF16 *) (string->chars); + outputEnd = (UTF8 *) (utf8String->chars); + + // Call the Unicode converter + // + cResult = ConvertUTF16toUTF8 + ( + (const UTF16**)&inputEnd, + ((const UTF16 *)(string->chars)) + string->len, + &outputEnd, + outputEnd + utf8String->size - 1, + lenientConversion + ); + + // We don't really care if things failed or not here, we just converted + // everything that was vaguely possible and stopped when it wasn't. It is + // up to the grammar programmer to verify that the input is sensible. + // + utf8String->len = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)outputEnd) - utf8String->chars); + + *(outputEnd+1) = '\0'; // Always null terminate + } + } + return utf8String; +} + +/** + * Creates a new string with enough capacity for size 8 bit characters plus a terminator. + * + * \param[in] factory - Pointer to the string factory that owns strings + * \param[in] size - In characters + * \return pointer to the new string. + */ +static pANTLR3_STRING +newSize8 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size) +{ + pANTLR3_STRING string; + + string = factory->newRaw(factory); + + if (string == NULL) + { + return string; + } + + /* Always add one more byte for a terminator ;-) + */ + string->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT8) * (size+1))); + *(string->chars) = '\0'; + string->size = size + 1; + + + return string; +} +/** + * Creates a new string with enough capacity for size UTF16 characters plus a terminator. + * + * \param[in] factory - Pointer to the string factory that owns strings + * \param[in] size - In characters (count double for surrogate pairs!!!) + * \return pointer to the new string. + */ +static pANTLR3_STRING +newSizeUTF16 (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size) +{ + pANTLR3_STRING string; + + string = factory->newRaw(factory); + + if (string == NULL) + { + return string; + } + + /* Always add one more byte for a terminator ;-) + */ + string->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT16) * (size+1))); + *(string->chars) = '\0'; + string->size = size+1; /* Size is always in characters, as is len */ + + return string; +} + +/** Creates a new 8 bit string initialized with the 8 bit characters at the + * supplied ptr, of pre-determined size. + * \param[in] factory - Pointer to the string factory that owns the strings + * \param[in] ptr - Pointer to 8 bit encoded characters + * \return pointer to the new string + */ +static pANTLR3_STRING +newPtr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size) +{ + pANTLR3_STRING string; + + string = factory->newSize(factory, size); + + if (string == NULL) + { + return NULL; + } + + if (size <= 0) + { + return string; + } + + if (ptr != NULL) + { + ANTLR3_MEMMOVE(string->chars, (const void *)ptr, size); + *(string->chars + size) = '\0'; /* Terminate, these strings are usually used for Token streams and printing etc. */ + string->len = size; + } + + return string; +} + +/** Creates a new UTF16 string initialized with the 8 bit characters at the + * supplied 8 bit character ptr, of pre-determined size. + * \param[in] factory - Pointer to the string factory that owns the strings + * \param[in] ptr - Pointer to 8 bit encoded characters + * \return pointer to the new string + */ +static pANTLR3_STRING +newPtrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size) +{ + pANTLR3_STRING string; + + /* newSize accepts size in characters, not bytes + */ + string = factory->newSize(factory, size); + + if (string == NULL) + { + return NULL; + } + + if (size <= 0) + { + return string; + } + + if (ptr != NULL) + { + pANTLR3_UINT16 out; + ANTLR3_INT32 inSize; + + out = (pANTLR3_UINT16)(string->chars); + inSize = size; + + while (inSize-- > 0) + { + *out++ = (ANTLR3_UINT16)(*ptr++); + } + + /* Terminate, these strings are usually used for Token streams and printing etc. + */ + *(((pANTLR3_UINT16)(string->chars)) + size) = '\0'; + + string->len = size; + } + + return string; +} + +/** Creates a new UTF16 string initialized with the UTF16 characters at the + * supplied ptr, of pre-determined size. + * \param[in] factory - Pointer to the string factory that owns the strings + * \param[in] ptr - Pointer to UTF16 encoded characters + * \return pointer to the new string + */ +static pANTLR3_STRING +newPtrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size) +{ + pANTLR3_STRING string; + + string = factory->newSize(factory, size); + + if (string == NULL) + { + return NULL; + } + + if (size <= 0) + { + return string; + } + + if (ptr != NULL) + { + ANTLR3_MEMMOVE(string->chars, (const void *)ptr, (size * sizeof(ANTLR3_UINT16))); + + /* Terminate, these strings are usually used for Token streams and printing etc. + */ + *(((pANTLR3_UINT16)(string->chars)) + size) = '\0'; + string->len = size; + } + + return string; +} + +/** Create a new 8 bit string from the supplied, null terminated, 8 bit string pointer. + * \param[in] factory - Pointer to the string factory that owns strings. + * \param[in] ptr - Pointer to the 8 bit encoded string + * \return Pointer to the newly initialized string + */ +static pANTLR3_STRING +newStr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr) +{ + return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr)); +} + +/** Create a new UTF16 string from the supplied, null terminated, 8 bit string pointer. + * \param[in] factory - Pointer to the string factory that owns strings. + * \param[in] ptr - Pointer to the 8 bit encoded string + * \return POinter to the newly initialized string + */ +static pANTLR3_STRING +newStrUTF16_8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr) +{ + return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr)); +} + +/** Create a new UTF16 string from the supplied, null terminated, UTF16 string pointer. + * \param[in] factory - Pointer to the string factory that owns strings. + * \param[in] ptr - Pointer to the UTF16 encoded string + * \return Pointer to the newly initialized string + */ +static pANTLR3_STRING +newStrUTF16_UTF16 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr) +{ + pANTLR3_UINT16 in; + ANTLR3_UINT32 count; + + /** First, determine the length of the input string + */ + in = (pANTLR3_UINT16)ptr; + count = 0; + + while (*in++ != '\0') + { + count++; + } + return factory->newPtr(factory, ptr, count); +} + +static void +destroy (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string) +{ + // Record which string we are deleting + // + ANTLR3_UINT32 strIndex = string->index; + + // Ensure that the string was not factory made, or we would try + // to delete memory that wasn't allocated outside the factory + // block. + // Remove the specific indexed string from the vector + // + factory->strings->del(factory->strings, strIndex); + + // One less string in the vector, so decrement the factory index + // so that the next string allocated is indexed correctly with + // respect to the vector. + // + factory->index--; + + // Now we have to reindex the strings in the vector that followed + // the one we just deleted. We only do this if the one we just deleted + // was not the last one. + // + if (strIndex< factory->index) + { + // We must reindex the strings after the one we just deleted. + // The one that follows the one we just deleted is also out + // of whack, so we start there. + // + ANTLR3_UINT32 i; + + for (i = strIndex; i < factory->index; i++) + { + // Renumber the entry + // + ((pANTLR3_STRING)(factory->strings->elements[i].element))->index = i; + } + } + + // The string has been destroyed and the elements of the factory are reindexed. + // + +} + +static pANTLR3_STRING +printable8(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr) +{ + pANTLR3_STRING string; + + /* We don't need to be too efficient here, this is mostly for error messages and so on. + */ + pANTLR3_UINT8 scannedText; + ANTLR3_UINT32 i; + + /* Assume we need as much as twice as much space to parse out the control characters + */ + string = factory->newSize(factory, instr->len *2 + 1); + + /* Scan through and replace unprintable (in terms of this routine) + * characters + */ + scannedText = string->chars; + + for (i = 0; i < instr->len; i++) + { + if (*(instr->chars + i) == '\n') + { + *scannedText++ = '\\'; + *scannedText++ = 'n'; + } + else if (*(instr->chars + i) == '\r') + { + *scannedText++ = '\\'; + *scannedText++ = 'r'; + } + else if (!isprint(*(instr->chars +i))) + { + *scannedText++ = '?'; + } + else + { + *scannedText++ = *(instr->chars + i); + } + } + *scannedText = '\0'; + + string->len = (ANTLR3_UINT32)(scannedText - string->chars); + + return string; +} + +static pANTLR3_STRING +printableUTF16(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr) +{ + pANTLR3_STRING string; + + /* We don't need to be too efficient here, this is mostly for error messages and so on. + */ + pANTLR3_UINT16 scannedText; + pANTLR3_UINT16 inText; + ANTLR3_UINT32 i; + ANTLR3_UINT32 outLen; + + /* Assume we need as much as twice as much space to parse out the control characters + */ + string = factory->newSize(factory, instr->len *2 + 1); + + /* Scan through and replace unprintable (in terms of this routine) + * characters + */ + scannedText = (pANTLR3_UINT16)(string->chars); + inText = (pANTLR3_UINT16)(instr->chars); + outLen = 0; + + for (i = 0; i < instr->len; i++) + { + if (*(inText + i) == '\n') + { + *scannedText++ = '\\'; + *scannedText++ = 'n'; + outLen += 2; + } + else if (*(inText + i) == '\r') + { + *scannedText++ = '\\'; + *scannedText++ = 'r'; + outLen += 2; + } + else if (!isprint(*(inText +i))) + { + *scannedText++ = '?'; + outLen++; + } + else + { + *scannedText++ = *(inText + i); + outLen++; + } + } + *scannedText = '\0'; + + string->len = outLen; + + return string; +} + +/** Fascist Capitalist Pig function created + * to oppress the workers comrade. + */ +static void +closeFactory (pANTLR3_STRING_FACTORY factory) +{ + /* Delete the vector we were tracking the strings with, this will + * causes all the allocated strings to be deallocated too + */ + factory->strings->free(factory->strings); + + /* Delete the space for the factory itself + */ + ANTLR3_FREE((void *)factory); +} + +static pANTLR3_UINT8 +append8 (pANTLR3_STRING string, const char * newbit) +{ + ANTLR3_UINT32 len; + + len = (ANTLR3_UINT32)strlen(newbit); + + if (string->size < (string->len + len + 1)) + { + string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1)); + string->size = string->len + len + 1; + } + + /* Note we copy one more byte than the strlen in order to get the trailing + */ + ANTLR3_MEMMOVE((void *)(string->chars + string->len), newbit, (ANTLR3_UINT32)(len+1)); + string->len += len; + + return string->chars; +} + +static pANTLR3_UINT8 +appendUTF16_8 (pANTLR3_STRING string, const char * newbit) +{ + ANTLR3_UINT32 len; + pANTLR3_UINT16 apPoint; + ANTLR3_UINT32 count; + + len = (ANTLR3_UINT32)strlen(newbit); + + if (string->size < (string->len + len + 1)) + { + string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)((sizeof(ANTLR3_UINT16)*(string->len + len + 1)))); + string->size = string->len + len + 1; + } + + apPoint = ((pANTLR3_UINT16)string->chars) + string->len; + string->len += len; + + for (count = 0; count < len; count++) + { + *apPoint++ = *(newbit + count); + } + *apPoint = '\0'; + + return string->chars; +} + +static pANTLR3_UINT8 +appendUTF16_UTF16 (pANTLR3_STRING string, const char * newbit) +{ + ANTLR3_UINT32 len; + pANTLR3_UINT16 in; + + /** First, determine the length of the input string + */ + in = (pANTLR3_UINT16)newbit; + len = 0; + + while (*in++ != '\0') + { + len++; + } + + if (string->size < (string->len + len + 1)) + { + string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)( sizeof(ANTLR3_UINT16) *(string->len + len + 1) )); + string->size = string->len + len + 1; + } + + /* Note we copy one more byte than the strlen in order to get the trailing delimiter + */ + ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + string->len), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len+1))); + string->len += len; + + return string->chars; +} + +static pANTLR3_UINT8 +set8 (pANTLR3_STRING string, const char * chars) +{ + ANTLR3_UINT32 len; + + len = (ANTLR3_UINT32)strlen(chars); + if (string->size < len + 1) + { + string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(len + 1)); + string->size = len + 1; + } + + /* Note we copy one more byte than the strlen in order to get the trailing '\0' + */ + ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)(len+1)); + string->len = len; + + return string->chars; + +} + +static pANTLR3_UINT8 +setUTF16_8 (pANTLR3_STRING string, const char * chars) +{ + ANTLR3_UINT32 len; + ANTLR3_UINT32 count; + pANTLR3_UINT16 apPoint; + + len = (ANTLR3_UINT32)strlen(chars); + if (string->size < len + 1) + { + string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1))); + string->size = len + 1; + } + apPoint = ((pANTLR3_UINT16)string->chars); + string->len = len; + + for (count = 0; count < string->len; count++) + { + *apPoint++ = *(chars + count); + } + *apPoint = '\0'; + + return string->chars; +} + +static pANTLR3_UINT8 +setUTF16_UTF16 (pANTLR3_STRING string, const char * chars) +{ + ANTLR3_UINT32 len; + pANTLR3_UINT16 in; + + /** First, determine the length of the input string + */ + in = (pANTLR3_UINT16)chars; + len = 0; + + while (*in++ != '\0') + { + len++; + } + + if (string->size < len + 1) + { + string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1))); + string->size = len + 1; + } + + /* Note we copy one more byte than the strlen in order to get the trailing '\0' + */ + ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)((len+1) * sizeof(ANTLR3_UINT16))); + string->len = len; + + return string->chars; + +} + +static pANTLR3_UINT8 +addc8 (pANTLR3_STRING string, ANTLR3_UINT32 c) +{ + if (string->size < string->len + 2) + { + string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + 2)); + string->size = string->len + 2; + } + *(string->chars + string->len) = (ANTLR3_UINT8)c; + *(string->chars + string->len + 1) = '\0'; + string->len++; + + return string->chars; +} + +static pANTLR3_UINT8 +addcUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 c) +{ + pANTLR3_UINT16 ptr; + + if (string->size < string->len + 2) + { + string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16) * (string->len + 2))); + string->size = string->len + 2; + } + ptr = (pANTLR3_UINT16)(string->chars); + + *(ptr + string->len) = (ANTLR3_UINT16)c; + *(ptr + string->len + 1) = '\0'; + string->len++; + + return string->chars; +} + +static pANTLR3_UINT8 +addi8 (pANTLR3_STRING string, ANTLR3_INT32 i) +{ + ANTLR3_UINT8 newbit[32]; + + sprintf((char *)newbit, "%d", i); + + return string->append8(string, (const char *)newbit); +} +static pANTLR3_UINT8 +addiUTF16 (pANTLR3_STRING string, ANTLR3_INT32 i) +{ + ANTLR3_UINT8 newbit[32]; + + sprintf((char *)newbit, "%d", i); + + return string->append8(string, (const char *)newbit); +} + +static pANTLR3_UINT8 +inserti8 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i) +{ + ANTLR3_UINT8 newbit[32]; + + sprintf((char *)newbit, "%d", i); + return string->insert8(string, point, (const char *)newbit); +} +static pANTLR3_UINT8 +insertiUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i) +{ + ANTLR3_UINT8 newbit[32]; + + sprintf((char *)newbit, "%d", i); + return string->insert8(string, point, (const char *)newbit); +} + +static pANTLR3_UINT8 +insert8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit) +{ + ANTLR3_UINT32 len; + + if (point >= string->len) + { + return string->append(string, newbit); + } + + len = (ANTLR3_UINT32)strlen(newbit); + + if (len == 0) + { + return string->chars; + } + + if (string->size < (string->len + len + 1)) + { + string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1)); + string->size = string->len + len + 1; + } + + /* Move the characters we are inserting before, including the delimiter + */ + ANTLR3_MEMMOVE((void *)(string->chars + point + len), (void *)(string->chars + point), (ANTLR3_UINT32)(string->len - point + 1)); + + /* Note we copy the exact number of bytes + */ + ANTLR3_MEMMOVE((void *)(string->chars + point), newbit, (ANTLR3_UINT32)(len)); + + string->len += len; + + return string->chars; +} + +static pANTLR3_UINT8 +insertUTF16_8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit) +{ + ANTLR3_UINT32 len; + ANTLR3_UINT32 count; + pANTLR3_UINT16 inPoint; + + if (point >= string->len) + { + return string->append8(string, newbit); + } + + len = (ANTLR3_UINT32)strlen(newbit); + + if (len == 0) + { + return string->chars; + } + + if (string->size < (string->len + len + 1)) + { + string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1))); + string->size = string->len + len + 1; + } + + /* Move the characters we are inserting before, including the delimiter + */ + ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1))); + + string->len += len; + + inPoint = ((pANTLR3_UINT16)(string->chars))+point; + for (count = 0; count<len; count++) + { + *(inPoint + count) = (ANTLR3_UINT16)(*(newbit+count)); + } + + return string->chars; +} + +static pANTLR3_UINT8 +insertUTF16_UTF16 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit) +{ + ANTLR3_UINT32 len; + pANTLR3_UINT16 in; + + if (point >= string->len) + { + return string->append(string, newbit); + } + + /** First, determine the length of the input string + */ + in = (pANTLR3_UINT16)newbit; + len = 0; + + while (*in++ != '\0') + { + len++; + } + + if (len == 0) + { + return string->chars; + } + + if (string->size < (string->len + len + 1)) + { + string->chars = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1))); + string->size = string->len + len + 1; + } + + /* Move the characters we are inserting before, including the delimiter + */ + ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1))); + + + /* Note we copy the exact number of characters + */ + ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len))); + + string->len += len; + + return string->chars; +} + +static pANTLR3_UINT8 setS (pANTLR3_STRING string, pANTLR3_STRING chars) +{ + return string->set(string, (const char *)(chars->chars)); +} + +static pANTLR3_UINT8 appendS (pANTLR3_STRING string, pANTLR3_STRING newbit) +{ + /* We may be passed an empty string, in which case we just return the current pointer + */ + if (newbit == NULL || newbit->len == 0 || newbit->size == 0 || newbit->chars == NULL) + { + return string->chars; + } + else + { + return string->append(string, (const char *)(newbit->chars)); + } +} + +static pANTLR3_UINT8 insertS (pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit) +{ + return string->insert(string, point, (const char *)(newbit->chars)); +} + +/* Function that compares the text of a string to the supplied + * 8 bit character string and returns a result a la strcmp() + */ +static ANTLR3_UINT32 +compare8 (pANTLR3_STRING string, const char * compStr) +{ + return strcmp((const char *)(string->chars), compStr); +} + +/* Function that compares the text of a string with the supplied character string + * (which is assumed to be in the same encoding as the string itself) and returns a result + * a la strcmp() + */ +static ANTLR3_UINT32 +compareUTF16_8 (pANTLR3_STRING string, const char * compStr) +{ + pANTLR3_UINT16 ourString; + ANTLR3_UINT32 charDiff; + + ourString = (pANTLR3_UINT16)(string->chars); + + while (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*compStr) != '\0')) + { + charDiff = *ourString - *compStr; + if (charDiff != 0) + { + return charDiff; + } + ourString++; + compStr++; + } + + /* At this point, one of the strings was terminated + */ + return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr)); + +} + +/* Function that compares the text of a string with the supplied character string + * (which is assumed to be in the same encoding as the string itself) and returns a result + * a la strcmp() + */ +static ANTLR3_UINT32 +compareUTF16_UTF16 (pANTLR3_STRING string, const char * compStr8) +{ + pANTLR3_UINT16 ourString; + pANTLR3_UINT16 compStr; + ANTLR3_UINT32 charDiff; + + ourString = (pANTLR3_UINT16)(string->chars); + compStr = (pANTLR3_UINT16)(compStr8); + + while (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)compStr)) != '\0')) + { + charDiff = *ourString - *compStr; + if (charDiff != 0) + { + return charDiff; + } + ourString++; + compStr++; + } + + /* At this point, one of the strings was terminated + */ + return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr)); +} + +/* Function that compares the text of a string with the supplied string + * (which is assumed to be in the same encoding as the string itself) and returns a result + * a la strcmp() + */ +static ANTLR3_UINT32 +compareS (pANTLR3_STRING string, pANTLR3_STRING compStr) +{ + return string->compare(string, (const char *)compStr->chars); +} + + +/* Function that returns the character indexed at the supplied + * offset as a 32 bit character. + */ +static ANTLR3_UCHAR +charAt8 (pANTLR3_STRING string, ANTLR3_UINT32 offset) +{ + if (offset > string->len) + { + return (ANTLR3_UCHAR)'\0'; + } + else + { + return (ANTLR3_UCHAR)(*(string->chars + offset)); + } +} + +/* Function that returns the character indexed at the supplied + * offset as a 32 bit character. + */ +static ANTLR3_UCHAR +charAtUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 offset) +{ + if (offset > string->len) + { + return (ANTLR3_UCHAR)'\0'; + } + else + { + return (ANTLR3_UCHAR)(*((pANTLR3_UINT16)(string->chars) + offset)); + } +} + +/* Function that returns a substring of the supplied string a la .subString(s,e) + * in java runtimes. + */ +static pANTLR3_STRING +subString8 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex) +{ + pANTLR3_STRING newStr; + + if (endIndex > string->len) + { + endIndex = string->len + 1; + } + newStr = string->factory->newPtr(string->factory, string->chars + startIndex, endIndex - startIndex); + + return newStr; +} + +/* Returns a substring of the supplied string a la .subString(s,e) + * in java runtimes. + */ +static pANTLR3_STRING +subStringUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex) +{ + pANTLR3_STRING newStr; + + if (endIndex > string->len) + { + endIndex = string->len + 1; + } + newStr = string->factory->newPtr(string->factory, (pANTLR3_UINT8)((pANTLR3_UINT16)(string->chars) + startIndex), endIndex - startIndex); + + return newStr; +} + +/* Function that can convert the characters in the string to an integer + */ +static ANTLR3_INT32 +toInt32_8 (struct ANTLR3_STRING_struct * string) +{ + return atoi((const char *)(string->chars)); +} + +/* Function that can convert the characters in the string to an integer + */ +static ANTLR3_INT32 +toInt32_UTF16 (struct ANTLR3_STRING_struct * string) +{ + pANTLR3_UINT16 input; + ANTLR3_INT32 value; + ANTLR3_BOOLEAN negate; + + value = 0; + input = (pANTLR3_UINT16)(string->chars); + negate = ANTLR3_FALSE; + + if (*input == (ANTLR3_UCHAR)'-') + { + negate = ANTLR3_TRUE; + input++; + } + else if (*input == (ANTLR3_UCHAR)'+') + { + input++; + } + + while (*input != '\0' && isdigit(*input)) + { + value = value * 10; + value += ((ANTLR3_UINT32)(*input) - (ANTLR3_UINT32)'0'); + input++; + } + + return negate ? -value : value; +} + +/* Function that returns a pointer to an 8 bit version of the string, + * which in this case is just the string as this is + * 8 bit encodiing anyway. + */ +static pANTLR3_STRING to8_8 (pANTLR3_STRING string) +{ + return string; +} + +/* Function that returns an 8 bit version of the string, + * which in this case is returning all the UTF16 characters + * narrowed back into 8 bits, with characters that are too large + * replaced with '_' + */ +static pANTLR3_STRING to8_UTF16 (pANTLR3_STRING string) +{ + pANTLR3_STRING newStr; + ANTLR3_UINT32 i; + + /* Create a new 8 bit string + */ + newStr = newRaw8(string->factory); + + if (newStr == NULL) + { + return NULL; + } + + /* Always add one more byte for a terminator + */ + newStr->chars = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(string->len + 1)); + newStr->size = string->len + 1; + newStr->len = string->len; + + /* Now copy each UTF16 charActer , making it an 8 bit character of + * some sort. + */ + for (i=0; i<string->len; i++) + { + ANTLR3_UCHAR c; + + c = *(((pANTLR3_UINT16)(string->chars)) + i); + + *(newStr->chars + i) = (ANTLR3_UINT8)(c > 255 ? '_' : c); + } + + /* Terminate + */ + *(newStr->chars + newStr->len) = '\0'; + + return newStr; +} diff --git a/impl/antlr/libantlr3c-3.4/src/antlr3tokenstream.c b/impl/antlr/libantlr3c-3.4/src/antlr3tokenstream.c new file mode 100644 index 0000000..ec06f15 --- /dev/null +++ b/impl/antlr/libantlr3c-3.4/src/antlr3tokenstream.c @@ -0,0 +1,1091 @@ +/// \file +/// Default implementation of CommonTokenStream +/// + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <antlr3tokenstream.h> + +#ifdef ANTLR3_WINDOWS +#pragma warning( disable : 4100 ) +#endif + +// COMMON_TOKEN_STREAM API +// +static void setTokenTypeChannel (pANTLR3_COMMON_TOKEN_STREAM cts, ANTLR3_UINT32 ttype, ANTLR3_UINT32 channel); +static void discardTokenType (pANTLR3_COMMON_TOKEN_STREAM cts, ANTLR3_INT32 ttype); +static void discardOffChannel (pANTLR3_COMMON_TOKEN_STREAM cts, ANTLR3_BOOLEAN discard); +static pANTLR3_VECTOR getTokens (pANTLR3_COMMON_TOKEN_STREAM cts); +static pANTLR3_LIST getTokenRange (pANTLR3_COMMON_TOKEN_STREAM cts, ANTLR3_UINT32 start, ANTLR3_UINT32 stop); +static pANTLR3_LIST getTokensSet (pANTLR3_COMMON_TOKEN_STREAM cts, ANTLR3_UINT32 start, ANTLR3_UINT32 stop, pANTLR3_BITSET types); +static pANTLR3_LIST getTokensList (pANTLR3_COMMON_TOKEN_STREAM cts, ANTLR3_UINT32 start, ANTLR3_UINT32 stop, pANTLR3_LIST list); +static pANTLR3_LIST getTokensType (pANTLR3_COMMON_TOKEN_STREAM cts, ANTLR3_UINT32 start, ANTLR3_UINT32 stop, ANTLR3_UINT32 type); +static void reset (pANTLR3_COMMON_TOKEN_STREAM cts); + +// TOKEN_STREAM API +// +static pANTLR3_COMMON_TOKEN tokLT (pANTLR3_TOKEN_STREAM ts, ANTLR3_INT32 k); +static pANTLR3_COMMON_TOKEN dbgTokLT (pANTLR3_TOKEN_STREAM ts, ANTLR3_INT32 k); +static pANTLR3_COMMON_TOKEN get (pANTLR3_TOKEN_STREAM ts, ANTLR3_UINT32 i); +static pANTLR3_TOKEN_SOURCE getTokenSource (pANTLR3_TOKEN_STREAM ts); +static void setTokenSource (pANTLR3_TOKEN_STREAM ts, pANTLR3_TOKEN_SOURCE tokenSource); +static pANTLR3_STRING toString (pANTLR3_TOKEN_STREAM ts); +static pANTLR3_STRING toStringSS (pANTLR3_TOKEN_STREAM ts, ANTLR3_UINT32 start, ANTLR3_UINT32 stop); +static pANTLR3_STRING toStringTT (pANTLR3_TOKEN_STREAM ts, pANTLR3_COMMON_TOKEN start, pANTLR3_COMMON_TOKEN stop); +static void setDebugListener (pANTLR3_TOKEN_STREAM ts, pANTLR3_DEBUG_EVENT_LISTENER debugger); + +// INT STREAM API +// +static void consume (pANTLR3_INT_STREAM is); +static void dbgConsume (pANTLR3_INT_STREAM is); +static ANTLR3_UINT32 _LA (pANTLR3_INT_STREAM is, ANTLR3_INT32 i); +static ANTLR3_UINT32 dbgLA (pANTLR3_INT_STREAM is, ANTLR3_INT32 i); +static ANTLR3_MARKER mark (pANTLR3_INT_STREAM is); +static ANTLR3_MARKER dbgMark (pANTLR3_INT_STREAM is); +static void release (pANTLR3_INT_STREAM is, ANTLR3_MARKER mark); +static ANTLR3_UINT32 size (pANTLR3_INT_STREAM is); +static ANTLR3_MARKER tindex (pANTLR3_INT_STREAM is); +static void rewindStream (pANTLR3_INT_STREAM is, ANTLR3_MARKER marker); +static void dbgRewindStream (pANTLR3_INT_STREAM is, ANTLR3_MARKER marker); +static void rewindLast (pANTLR3_INT_STREAM is); +static void dbgRewindLast (pANTLR3_INT_STREAM is); +static void seek (pANTLR3_INT_STREAM is, ANTLR3_MARKER index); +static void dbgSeek (pANTLR3_INT_STREAM is, ANTLR3_MARKER index); +static pANTLR3_STRING getSourceName (pANTLR3_INT_STREAM is); +static void antlr3TokenStreamFree (pANTLR3_TOKEN_STREAM stream); +static void antlr3CTSFree (pANTLR3_COMMON_TOKEN_STREAM stream); + +// Helpers +// +static void fillBuffer (pANTLR3_COMMON_TOKEN_STREAM tokenStream); +static ANTLR3_UINT32 skipOffTokenChannels (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_INT32 i); +static ANTLR3_UINT32 skipOffTokenChannelsReverse (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_INT32 i); +static pANTLR3_COMMON_TOKEN LB (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_INT32 i); + +ANTLR3_API pANTLR3_TOKEN_STREAM +antlr3TokenStreamNew() +{ + pANTLR3_TOKEN_STREAM stream; + + // Memory for the interface structure + // + stream = (pANTLR3_TOKEN_STREAM) ANTLR3_MALLOC(sizeof(ANTLR3_TOKEN_STREAM)); + + if (stream == NULL) + { + return NULL; + } + + // Install basic API + // + stream->free = antlr3TokenStreamFree; + + + return stream; +} + +static void +antlr3TokenStreamFree(pANTLR3_TOKEN_STREAM stream) +{ + ANTLR3_FREE(stream); +} + +static void +antlr3CTSFree (pANTLR3_COMMON_TOKEN_STREAM stream) +{ + // We only free up our subordinate interfaces if they belong + // to us, otherwise we let whoever owns them deal with them. + // + if (stream->tstream->super == stream) + { + if (stream->tstream->istream->super == stream->tstream) + { + stream->tstream->istream->free(stream->tstream->istream); + stream->tstream->istream = NULL; + } + stream->tstream->free(stream->tstream); + } + + // Now we free our own resources + // + if (stream->tokens != NULL) + { + stream->tokens->free(stream->tokens); + stream->tokens = NULL; + } + if (stream->discardSet != NULL) + { + stream->discardSet->free(stream->discardSet); + stream->discardSet = NULL; + } + if (stream->channelOverrides != NULL) + { + stream->channelOverrides->free(stream->channelOverrides); + stream->channelOverrides = NULL; + } + + // Free our memory now + // + ANTLR3_FREE(stream); +} + +// Reset a token stream so it can be used again and can reuse it's +// resources. +// +static void +reset (pANTLR3_COMMON_TOKEN_STREAM cts) +{ + + // Free any resources that ar most like specifc to the + // run we just did. + // + if (cts->discardSet != NULL) + { + cts->discardSet->free(cts->discardSet); + cts->discardSet = NULL; + } + if (cts->channelOverrides != NULL) + { + cts->channelOverrides->free(cts->channelOverrides); + cts->channelOverrides = NULL; + } + + // Now, if there were any existing tokens in the stream, + // then we just reset the vector count so that it starts + // again. We must traverse the entries unfortunately as + // there may be free pointers for custom token types and + // so on. However that is just a quick NULL check on the + // vector entries. + // + if (cts->tokens != NULL) + { + cts->tokens->clear(cts->tokens); + } + else + { + /* Install the token tracking tables + */ + cts->tokens = antlr3VectorNew(0); + } + + // Reset to defaults + // + cts->discardOffChannel = ANTLR3_FALSE; + cts->channel = ANTLR3_TOKEN_DEFAULT_CHANNEL; + cts->p = -1; +} + +ANTLR3_API pANTLR3_COMMON_TOKEN_STREAM +antlr3CommonTokenDebugStreamSourceNew(ANTLR3_UINT32 hint, pANTLR3_TOKEN_SOURCE source, pANTLR3_DEBUG_EVENT_LISTENER debugger) +{ + pANTLR3_COMMON_TOKEN_STREAM stream; + + // Create a standard token stream + // + stream = antlr3CommonTokenStreamSourceNew(hint, source); + + // Install the debugger object + // + stream->tstream->debugger = debugger; + + // Override standard token stream methods with debugging versions + // + stream->tstream->initialStreamState = ANTLR3_FALSE; + + stream->tstream->_LT = dbgTokLT; + + stream->tstream->istream->consume = dbgConsume; + stream->tstream->istream->_LA = dbgLA; + stream->tstream->istream->mark = dbgMark; + stream->tstream->istream->rewind = dbgRewindStream; + stream->tstream->istream->rewindLast = dbgRewindLast; + stream->tstream->istream->seek = dbgSeek; + + return stream; +} + +ANTLR3_API pANTLR3_COMMON_TOKEN_STREAM +antlr3CommonTokenStreamSourceNew(ANTLR3_UINT32 hint, pANTLR3_TOKEN_SOURCE source) +{ + pANTLR3_COMMON_TOKEN_STREAM stream; + + stream = antlr3CommonTokenStreamNew(hint); + + stream->channel = ANTLR3_TOKEN_DEFAULT_CHANNEL; + + stream->channelOverrides = NULL; + stream->discardSet = NULL; + stream->discardOffChannel = ANTLR3_FALSE; + + stream->tstream->setTokenSource(stream->tstream, source); + + stream->free = antlr3CTSFree; + return stream; +} + +ANTLR3_API pANTLR3_COMMON_TOKEN_STREAM +antlr3CommonTokenStreamNew(ANTLR3_UINT32 hint) +{ + pANTLR3_COMMON_TOKEN_STREAM stream; + + /* Memory for the interface structure + */ + stream = (pANTLR3_COMMON_TOKEN_STREAM) ANTLR3_MALLOC(sizeof(ANTLR3_COMMON_TOKEN_STREAM)); + + if (stream == NULL) + { + return NULL; + } + + /* Create space for the token stream interface + */ + stream->tstream = antlr3TokenStreamNew(); + stream->tstream->super = stream; + + /* Create space for the INT_STREAM interfacce + */ + stream->tstream->istream = antlr3IntStreamNew(); + stream->tstream->istream->super = (stream->tstream); + stream->tstream->istream->type = ANTLR3_TOKENSTREAM; + + /* Install the token tracking tables + */ + stream->tokens = antlr3VectorNew(0); + + /* Defaults + */ + stream->p = -1; + + /* Install the common token stream API + */ + stream->setTokenTypeChannel = setTokenTypeChannel; + stream->discardTokenType = discardTokenType; + stream->discardOffChannelToks = discardOffChannel; + stream->getTokens = getTokens; + stream->getTokenRange = getTokenRange; + stream->getTokensSet = getTokensSet; + stream->getTokensList = getTokensList; + stream->getTokensType = getTokensType; + stream->reset = reset; + + /* Install the token stream API + */ + stream->tstream->_LT = tokLT; + stream->tstream->get = get; + stream->tstream->getTokenSource = getTokenSource; + stream->tstream->setTokenSource = setTokenSource; + stream->tstream->toString = toString; + stream->tstream->toStringSS = toStringSS; + stream->tstream->toStringTT = toStringTT; + stream->tstream->setDebugListener = setDebugListener; + + /* Install INT_STREAM interface + */ + stream->tstream->istream->_LA = _LA; + stream->tstream->istream->mark = mark; + stream->tstream->istream->release = release; + stream->tstream->istream->size = size; + stream->tstream->istream->index = tindex; + stream->tstream->istream->rewind = rewindStream; + stream->tstream->istream->rewindLast= rewindLast; + stream->tstream->istream->seek = seek; + stream->tstream->istream->consume = consume; + stream->tstream->istream->getSourceName = getSourceName; + + return stream; +} + +// Install a debug listener adn switch to debug mode methods +// +static void +setDebugListener (pANTLR3_TOKEN_STREAM ts, pANTLR3_DEBUG_EVENT_LISTENER debugger) +{ + // Install the debugger object + // + ts->debugger = debugger; + + // Override standard token stream methods with debugging versions + // + ts->initialStreamState = ANTLR3_FALSE; + + ts->_LT = dbgTokLT; + + ts->istream->consume = dbgConsume; + ts->istream->_LA = dbgLA; + ts->istream->mark = dbgMark; + ts->istream->rewind = dbgRewindStream; + ts->istream->rewindLast = dbgRewindLast; + ts->istream->seek = dbgSeek; +} + +/** Get the ith token from the current position 1..n where k=1 is the +* first symbol of lookahead. +*/ +static pANTLR3_COMMON_TOKEN +tokLT (pANTLR3_TOKEN_STREAM ts, ANTLR3_INT32 k) +{ + ANTLR3_INT32 i; + ANTLR3_INT32 n; + pANTLR3_COMMON_TOKEN_STREAM cts; + + cts = (pANTLR3_COMMON_TOKEN_STREAM)ts->super; + + if (k < 0) + { + return LB(cts, -k); + } + + if (cts->p == -1) + { + fillBuffer(cts); + } + + // Here we used to check for k == 0 and return 0, but this seems + // a superfluous check to me. LT(k=0) is therefore just undefined + // and we won't waste the clock cycles on the check + // + + if ((cts->p + k - 1) >= (ANTLR3_INT32)ts->istream->cachedSize) + { + pANTLR3_COMMON_TOKEN teof = &(ts->tokenSource->eofToken); + + teof->setStartIndex (teof, ts->istream->index (ts->istream)); + teof->setStopIndex (teof, ts->istream->index (ts->istream)); + return teof; + } + + i = cts->p; + n = 1; + + /* Need to find k good tokens, skipping ones that are off channel + */ + while ( n < k) + { + /* Skip off-channel tokens */ + i = skipOffTokenChannels(cts, i+1); /* leave p on valid token */ + n++; + } + if ( (ANTLR3_UINT32) i >= ts->istream->cachedSize) + { + pANTLR3_COMMON_TOKEN teof = &(ts->tokenSource->eofToken); + + teof->setStartIndex (teof, ts->istream->index(ts->istream)); + teof->setStopIndex (teof, ts->istream->index(ts->istream)); + return teof; + } + + // Here the token must be in the input vector. Rather then incur + // function call penalty, we just return the pointer directly + // from the vector + // + return (pANTLR3_COMMON_TOKEN)cts->tokens->elements[i].element; + //return (pANTLR3_COMMON_TOKEN)cts->tokens->get(cts->tokens, i); +} + +/// Debug only method to flag consumption of initial off-channel +/// tokens in the input stream +/// +static void +consumeInitialHiddenTokens(pANTLR3_INT_STREAM is) +{ + ANTLR3_MARKER first; + ANTLR3_INT32 i; + pANTLR3_TOKEN_STREAM ts; + + ts = (pANTLR3_TOKEN_STREAM) is->super; + first = is->index(is); + + for (i=0; i<first; i++) + { + ts->debugger->consumeHiddenToken(ts->debugger, ts->get(ts, i)); + } + + ts->initialStreamState = ANTLR3_FALSE; + +} + +/// As per the normal tokLT but sends information to the debugger +/// +static pANTLR3_COMMON_TOKEN +dbgTokLT (pANTLR3_TOKEN_STREAM ts, ANTLR3_INT32 k) +{ + if (ts->initialStreamState == ANTLR3_TRUE) + { + consumeInitialHiddenTokens(ts->istream); + } + return tokLT(ts, k); +} + +#ifdef ANTLR3_WINDOWS + /* When fully optimized VC7 complains about non reachable code. + * Not yet sure if this is an optimizer bug, or a bug in the flow analysis + */ +#pragma warning( disable : 4702 ) +#endif + +static pANTLR3_COMMON_TOKEN +LB(pANTLR3_COMMON_TOKEN_STREAM cts, ANTLR3_INT32 k) +{ + ANTLR3_INT32 i; + ANTLR3_INT32 n; + + if (cts->p == -1) + { + fillBuffer(cts); + } + if (k == 0) + { + return NULL; + } + if ((cts->p - k) < 0) + { + return NULL; + } + + i = cts->p; + n = 1; + + /* Need to find k good tokens, going backwards, skipping ones that are off channel + */ + while (n <= (ANTLR3_INT32) k) + { + /* Skip off-channel tokens + */ + + i = skipOffTokenChannelsReverse(cts, i - 1); /* leave p on valid token */ + n++; + } + if (i < 0) + { + return NULL; + } + // Here the token must be in the input vector. Rather then incut + // function call penalty, we jsut return the pointer directly + // from the vector + // + return (pANTLR3_COMMON_TOKEN)cts->tokens->elements[i].element; +} + +static pANTLR3_COMMON_TOKEN +get (pANTLR3_TOKEN_STREAM ts, ANTLR3_UINT32 i) +{ + pANTLR3_COMMON_TOKEN_STREAM cts; + + cts = (pANTLR3_COMMON_TOKEN_STREAM)ts->super; + + return (pANTLR3_COMMON_TOKEN)(cts->tokens->get(cts->tokens, i)); /* Token index is zero based but vectors are 1 based */ +} + +static pANTLR3_TOKEN_SOURCE +getTokenSource (pANTLR3_TOKEN_STREAM ts) +{ + return ts->tokenSource; +} + +static void +setTokenSource ( pANTLR3_TOKEN_STREAM ts, + pANTLR3_TOKEN_SOURCE tokenSource) +{ + ts->tokenSource = tokenSource; +} + +static pANTLR3_STRING +toString (pANTLR3_TOKEN_STREAM ts) +{ + pANTLR3_COMMON_TOKEN_STREAM cts; + + cts = (pANTLR3_COMMON_TOKEN_STREAM)ts->super; + + if (cts->p == -1) + { + fillBuffer(cts); + } + + return ts->toStringSS(ts, 0, ts->istream->size(ts->istream)); +} + +static pANTLR3_STRING +toStringSS(pANTLR3_TOKEN_STREAM ts, ANTLR3_UINT32 start, ANTLR3_UINT32 stop) +{ + pANTLR3_STRING string; + pANTLR3_TOKEN_SOURCE tsource; + pANTLR3_COMMON_TOKEN tok; + ANTLR3_UINT32 i; + pANTLR3_COMMON_TOKEN_STREAM cts; + + cts = (pANTLR3_COMMON_TOKEN_STREAM) ts->super; + + if (cts->p == -1) + { + fillBuffer(cts); + } + if (stop >= ts->istream->size(ts->istream)) + { + stop = ts->istream->size(ts->istream) - 1; + } + + /* Who is giving us these tokens? + */ + tsource = ts->getTokenSource(ts); + + if (tsource != NULL && cts->tokens != NULL) + { + /* Finally, let's get a string + */ + string = tsource->strFactory->newRaw(tsource->strFactory); + + for (i = start; i <= stop; i++) + { + tok = ts->get(ts, i); + if (tok != NULL) + { + string->appendS(string, tok->getText(tok)); + } + } + + return string; + } + return NULL; + +} + +static pANTLR3_STRING +toStringTT (pANTLR3_TOKEN_STREAM ts, pANTLR3_COMMON_TOKEN start, pANTLR3_COMMON_TOKEN stop) +{ + if (start != NULL && stop != NULL) + { + return ts->toStringSS(ts, (ANTLR3_UINT32)start->getTokenIndex(start), (ANTLR3_UINT32)stop->getTokenIndex(stop)); + } + else + { + return NULL; + } +} + +/** Move the input pointer to the next incoming token. The stream + * must become active with LT(1) available. consume() simply + * moves the input pointer so that LT(1) points at the next + * input symbol. Consume at least one token. + * + * Walk past any token not on the channel the parser is listening to. + */ +static void +consume (pANTLR3_INT_STREAM is) +{ + pANTLR3_COMMON_TOKEN_STREAM cts; + pANTLR3_TOKEN_STREAM ts; + + ts = (pANTLR3_TOKEN_STREAM) is->super; + cts = (pANTLR3_COMMON_TOKEN_STREAM) ts->super; + + if ((ANTLR3_UINT32)cts->p < cts->tokens->count) + { + cts->p++; + cts->p = skipOffTokenChannels(cts, cts->p); + } +} + + +/// As per ordinary consume but notifies the debugger about hidden +/// tokens and so on. +/// +static void +dbgConsume (pANTLR3_INT_STREAM is) +{ + pANTLR3_TOKEN_STREAM ts; + ANTLR3_MARKER a; + ANTLR3_MARKER b; + pANTLR3_COMMON_TOKEN t; + + ts = (pANTLR3_TOKEN_STREAM) is->super; + + if (ts->initialStreamState == ANTLR3_TRUE) + { + consumeInitialHiddenTokens(is); + } + + a = is->index(is); // Where are we right now? + t = ts->_LT(ts, 1); // Current token from stream + + consume(is); // Standard consumer + + b = is->index(is); // Where are we after consuming 1 on channel token? + + ts->debugger->consumeToken(ts->debugger, t); // Tell the debugger that we consumed the first token + + if (b>a+1) + { + // The standard consume caused the index to advance by more than 1, + // which can only happen if it skipped some off-channel tokens. + // we need to tell the debugger about those tokens. + // + ANTLR3_MARKER i; + + for (i = a+1; i<b; i++) + { + ts->debugger->consumeHiddenToken(ts->debugger, ts->get(ts, (ANTLR3_UINT32)i)); + } + + } +} + +/** A simple filter mechanism whereby you can tell this token stream + * to force all tokens of type ttype to be on channel. For example, + * when interpreting, we cannot execute actions so we need to tell + * the stream to force all WS and NEWLINE to be a different, ignored, + * channel. + */ +static void +setTokenTypeChannel (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_UINT32 ttype, ANTLR3_UINT32 channel) +{ + if (tokenStream->channelOverrides == NULL) + { + tokenStream->channelOverrides = antlr3ListNew(10); + } + + /* We add one to the channel so we can distinguish NULL as being no entry in the + * table for a particular token type. + */ + tokenStream->channelOverrides->put(tokenStream->channelOverrides, ttype, ANTLR3_FUNC_PTR((ANTLR3_UINT32)channel + 1), NULL); +} + +static void +discardTokenType (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_INT32 ttype) +{ + if (tokenStream->discardSet == NULL) + { + tokenStream->discardSet = antlr3ListNew(31); + } + + /* We add one to the channel so we can distinguish NULL as being no entry in the + * table for a particular token type. We could use bitsets for this I suppose too. + */ + tokenStream->discardSet->put(tokenStream->discardSet, ttype, ANTLR3_FUNC_PTR((ANTLR3_UINT32)ttype + 1), NULL); +} + +static void +discardOffChannel (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_BOOLEAN discard) +{ + tokenStream->discardOffChannel = discard; +} + +static pANTLR3_VECTOR +getTokens (pANTLR3_COMMON_TOKEN_STREAM tokenStream) +{ + if (tokenStream->p == -1) + { + fillBuffer(tokenStream); + } + + return tokenStream->tokens; +} + +static pANTLR3_LIST +getTokenRange (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_UINT32 start, ANTLR3_UINT32 stop) +{ + return tokenStream->getTokensSet(tokenStream, start, stop, NULL); +} +/** Given a start and stop index, return a List of all tokens in + * the token type BitSet. Return null if no tokens were found. This + * method looks at both on and off channel tokens. + */ +static pANTLR3_LIST +getTokensSet (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_UINT32 start, ANTLR3_UINT32 stop, pANTLR3_BITSET types) +{ + pANTLR3_LIST filteredList; + ANTLR3_UINT32 i; + ANTLR3_UINT32 n; + pANTLR3_COMMON_TOKEN tok; + + if (tokenStream->p == -1) + { + fillBuffer(tokenStream); + } + if (stop > tokenStream->tstream->istream->size(tokenStream->tstream->istream)) + { + stop = tokenStream->tstream->istream->size(tokenStream->tstream->istream); + } + if (start > stop) + { + return NULL; + } + + /* We have the range set, now we need to iterate through the + * installed tokens and create a new list with just the ones we want + * in it. We are just moving pointers about really. + */ + filteredList = antlr3ListNew((ANTLR3_UINT32)tokenStream->tstream->istream->size(tokenStream->tstream->istream)); + + for (i = start, n = 0; i<= stop; i++) + { + tok = tokenStream->tstream->get(tokenStream->tstream, i); + + if ( types == NULL + || types->isMember(types, tok->getType(tok) == ANTLR3_TRUE) + ) + { + filteredList->put(filteredList, n++, (void *)tok, NULL); + } + } + + /* Did we get any then? + */ + if (filteredList->size(filteredList) == 0) + { + filteredList->free(filteredList); + filteredList = NULL; + } + + return filteredList; +} + +static pANTLR3_LIST +getTokensList (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_UINT32 start, ANTLR3_UINT32 stop, pANTLR3_LIST list) +{ + pANTLR3_BITSET bitSet; + pANTLR3_LIST newlist; + + bitSet = antlr3BitsetList(list->table); + + newlist = tokenStream->getTokensSet(tokenStream, start, stop, bitSet); + + bitSet->free(bitSet); + + return newlist; + +} + +static pANTLR3_LIST +getTokensType (pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_UINT32 start, ANTLR3_UINT32 stop, ANTLR3_UINT32 type) +{ + pANTLR3_BITSET bitSet; + pANTLR3_LIST newlist; + + bitSet = antlr3BitsetOf(type, -1); + newlist = tokenStream->getTokensSet(tokenStream, start, stop, bitSet); + + bitSet->free(bitSet); + + return newlist; +} + +static ANTLR3_UINT32 +_LA (pANTLR3_INT_STREAM is, ANTLR3_INT32 i) +{ + pANTLR3_TOKEN_STREAM ts; + pANTLR3_COMMON_TOKEN tok; + + ts = (pANTLR3_TOKEN_STREAM) is->super; + + tok = ts->_LT(ts, i); + + if (tok != NULL) + { + return tok->getType(tok); + } + else + { + return ANTLR3_TOKEN_INVALID; + } +} + +/// As per _LA() but for debug mode. +/// +static ANTLR3_UINT32 +dbgLA (pANTLR3_INT_STREAM is, ANTLR3_INT32 i) +{ + pANTLR3_TOKEN_STREAM ts; + + ts = (pANTLR3_TOKEN_STREAM) is->super; + + if (ts->initialStreamState == ANTLR3_TRUE) + { + consumeInitialHiddenTokens(is); + } + ts->debugger->LT(ts->debugger, i, tokLT(ts, i)); + return _LA(is, i); +} + +static ANTLR3_MARKER +mark (pANTLR3_INT_STREAM is) +{ + is->lastMarker = is->index(is); + return is->lastMarker; +} + +/// As per mark() but with a call to tell the debugger we are doing this +/// +static ANTLR3_MARKER +dbgMark (pANTLR3_INT_STREAM is) +{ + pANTLR3_TOKEN_STREAM ts; + + ts = (pANTLR3_TOKEN_STREAM) is->super; + + is->lastMarker = is->index(is); + ts->debugger->mark(ts->debugger, is->lastMarker); + + return is->lastMarker; +} + +static void +release (pANTLR3_INT_STREAM is, ANTLR3_MARKER mark) +{ + return; +} + +static ANTLR3_UINT32 +size (pANTLR3_INT_STREAM is) +{ + pANTLR3_COMMON_TOKEN_STREAM cts; + pANTLR3_TOKEN_STREAM ts; + + if (is->cachedSize > 0) + { + return is->cachedSize; + } + ts = (pANTLR3_TOKEN_STREAM) is->super; + cts = (pANTLR3_COMMON_TOKEN_STREAM) ts->super; + + is->cachedSize = cts->tokens->count; + return is->cachedSize; +} + +static ANTLR3_MARKER +tindex (pANTLR3_INT_STREAM is) +{ + pANTLR3_COMMON_TOKEN_STREAM cts; + pANTLR3_TOKEN_STREAM ts; + + ts = (pANTLR3_TOKEN_STREAM) is->super; + cts = (pANTLR3_COMMON_TOKEN_STREAM) ts->super; + + return cts->p; +} + +static void +dbgRewindLast (pANTLR3_INT_STREAM is) +{ + pANTLR3_TOKEN_STREAM ts; + + ts = (pANTLR3_TOKEN_STREAM) is->super; + + ts->debugger->rewindLast(ts->debugger); + + is->rewind(is, is->lastMarker); +} +static void +rewindLast (pANTLR3_INT_STREAM is) +{ + is->rewind(is, is->lastMarker); +} +static void +rewindStream (pANTLR3_INT_STREAM is, ANTLR3_MARKER marker) +{ + is->seek(is, (ANTLR3_UINT32)(marker)); +} +static void +dbgRewindStream (pANTLR3_INT_STREAM is, ANTLR3_MARKER marker) +{ + pANTLR3_TOKEN_STREAM ts; + + ts = (pANTLR3_TOKEN_STREAM) is->super; + + ts->debugger->rewind(ts->debugger, marker); + + is->seek(is, (ANTLR3_UINT32)(marker)); +} + +static void +seek (pANTLR3_INT_STREAM is, ANTLR3_MARKER index) +{ + pANTLR3_COMMON_TOKEN_STREAM cts; + pANTLR3_TOKEN_STREAM ts; + + ts = (pANTLR3_TOKEN_STREAM) is->super; + cts = (pANTLR3_COMMON_TOKEN_STREAM) ts->super; + + cts->p = (ANTLR3_UINT32)index; +} +static void +dbgSeek (pANTLR3_INT_STREAM is, ANTLR3_MARKER index) +{ + // TODO: Implement seek in debugger when Ter adds it to Java + // + seek(is, index); +} +ANTLR3_API void +fillBufferExt(pANTLR3_COMMON_TOKEN_STREAM tokenStream) +{ + fillBuffer(tokenStream); +} +static void +fillBuffer(pANTLR3_COMMON_TOKEN_STREAM tokenStream) { + ANTLR3_UINT32 index; + pANTLR3_COMMON_TOKEN tok; + ANTLR3_BOOLEAN discard; + void * channelI; + + /* Start at index 0 of course + */ + index = 0; + + /* Pick out the next token from the token source + * Remember we just get a pointer (reference if you like) here + * and so if we store it anywhere, we don't set any pointers to auto free it. + */ + tok = tokenStream->tstream->tokenSource->nextToken(tokenStream->tstream->tokenSource); + + while (tok != NULL && tok->type != ANTLR3_TOKEN_EOF) + { + discard = ANTLR3_FALSE; /* Assume we are not discarding */ + + /* I employ a bit of a trick, or perhaps hack here. Rather than + * store a pointer to a structure in the override map and discard set + * we store the value + 1 cast to a void *. Hence on systems where NULL = (void *)0 + * we can distinguish "not being there" from "being channel or type 0" + */ + + if (tokenStream->discardSet != NULL + && tokenStream->discardSet->get(tokenStream->discardSet, tok->getType(tok)) != NULL) + { + discard = ANTLR3_TRUE; + } + else if ( tokenStream->discardOffChannel == ANTLR3_TRUE + && tok->getChannel(tok) != tokenStream->channel + ) + { + discard = ANTLR3_TRUE; + } + else if (tokenStream->channelOverrides != NULL) + { + /* See if this type is in the override map + */ + channelI = tokenStream->channelOverrides->get(tokenStream->channelOverrides, tok->getType(tok) + 1); + + if (channelI != NULL) + { + /* Override found + */ + tok->setChannel(tok, ANTLR3_UINT32_CAST(channelI) - 1); + } + } + + /* If not discarding it, add it to the list at the current index + */ + if (discard == ANTLR3_FALSE) + { + /* Add it, indicating that we will delete it and the table should not + */ + tok->setTokenIndex(tok, index); + tokenStream->p++; + tokenStream->tokens->add(tokenStream->tokens, (void *) tok, NULL); + index++; + } + + tok = tokenStream->tstream->tokenSource->nextToken(tokenStream->tstream->tokenSource); + } + + /* Cache the size so we don't keep doing indirect method calls. We do this as + * early as possible so that anything after this may utilize the cached value. + */ + tokenStream->tstream->istream->cachedSize = tokenStream->tokens->count; + + /* Set the consume pointer to the first token that is on our channel + */ + tokenStream->p = 0; + tokenStream->p = skipOffTokenChannels(tokenStream, tokenStream->p); + +} + +/// Given a starting index, return the index of the first on-channel +/// token. +/// +static ANTLR3_UINT32 +skipOffTokenChannels(pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_INT32 i) { + ANTLR3_INT32 n; + pANTLR3_COMMON_TOKEN tok; + + n = tokenStream->tstream->istream->cachedSize; + + while (i < n) + { + tok = (pANTLR3_COMMON_TOKEN)tokenStream->tokens->elements[i].element; + + if (tok->channel!= tokenStream->channel) + { + i++; + } + else + { + return i; + } + } + return i; +} + +static ANTLR3_UINT32 +skipOffTokenChannelsReverse(pANTLR3_COMMON_TOKEN_STREAM tokenStream, ANTLR3_INT32 x) +{ + pANTLR3_COMMON_TOKEN tok; + + while (x >= 0) + { + tok = (pANTLR3_COMMON_TOKEN)tokenStream->tokens->elements[x].element; + + if ((tok->channel != tokenStream->channel)) + { + x--; + } + else + { + return x; + } + } + return x; +} + +/// Return a string that represents the name assoicated with the input source +/// +/// /param[in] is The ANTLR3_INT_STREAM interface that is representing this token stream. +/// +/// /returns +/// /implements ANTLR3_INT_STREAM_struct::getSourceName() +/// +static pANTLR3_STRING +getSourceName (pANTLR3_INT_STREAM is) +{ + // Slightly convoluted as we must trace back to the lexer's input source + // via the token source. The streamName that is here is not initialized + // because this is a token stream, not a file or string stream, which are the + // only things that have a context for a source name. + // + return ((pANTLR3_TOKEN_STREAM)(is->super))->tokenSource->fileName; +} diff --git a/impl/antlr/libantlr3c-3.4/src/antlr3treeparser.c b/impl/antlr/libantlr3c-3.4/src/antlr3treeparser.c new file mode 100644 index 0000000..b7e035a --- /dev/null +++ b/impl/antlr/libantlr3c-3.4/src/antlr3treeparser.c @@ -0,0 +1,255 @@ +/** \file + * Implementation of the tree parser and overrides for the base recognizer + */ + +// [The "BSD licence"] +// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC +// http://www.temporal-wave.com +// http://www.linkedin.com/in/jimidle +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. The name of the author may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <antlr3treeparser.h> + +/* BASE Recognizer overrides + */ +static void mismatch (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow); + +/* Tree parser API + */ +static void setTreeNodeStream (pANTLR3_TREE_PARSER parser, pANTLR3_COMMON_TREE_NODE_STREAM input); +static pANTLR3_COMMON_TREE_NODE_STREAM + getTreeNodeStream (pANTLR3_TREE_PARSER parser); +static void freeParser (pANTLR3_TREE_PARSER parser); +static void * getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream); +static void * getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e, + ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow); + + +ANTLR3_API pANTLR3_TREE_PARSER +antlr3TreeParserNewStream(ANTLR3_UINT32 sizeHint, pANTLR3_COMMON_TREE_NODE_STREAM ctnstream, pANTLR3_RECOGNIZER_SHARED_STATE state) +{ + pANTLR3_TREE_PARSER parser; + + /** Allocate tree parser memory + */ + parser =(pANTLR3_TREE_PARSER) ANTLR3_MALLOC(sizeof(ANTLR3_TREE_PARSER)); + + if (parser == NULL) + { + return NULL; + } + + /* Create and install a base recognizer which does most of the work for us + */ + parser->rec = antlr3BaseRecognizerNew(ANTLR3_TYPE_PARSER, sizeHint, state); + + if (parser->rec == NULL) + { + parser->free(parser); + return NULL; + } + + /* Ensure we can track back to the tree parser super structure + * from the base recognizer structure + */ + parser->rec->super = parser; + parser->rec->type = ANTLR3_TYPE_TREE_PARSER; + + /* Install our base recognizer overrides + */ + parser->rec->mismatch = mismatch; + parser->rec->exConstruct = antlr3MTNExceptionNew; + parser->rec->getCurrentInputSymbol = getCurrentInputSymbol; + parser->rec->getMissingSymbol = getMissingSymbol; + + /* Install tree parser API + */ + parser->getTreeNodeStream = getTreeNodeStream; + parser->setTreeNodeStream = setTreeNodeStream; + parser->free = freeParser; + + /* Install the tree node stream + */ + parser->setTreeNodeStream(parser, ctnstream); + + return parser; +} + +/** + * \brief + * Creates a new Mismatched Tree Nde Exception and inserts in the recognizer + * exception stack. + * + * \param recognizer + * Context pointer for this recognizer + * + */ +ANTLR3_API void +antlr3MTNExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer) +{ + /* Create a basic recognition exception structure + */ + antlr3RecognitionExceptionNew(recognizer); + + /* Now update it to indicate this is a Mismatched token exception + */ + recognizer->state->exception->name = ANTLR3_MISMATCHED_TREE_NODE_NAME; + recognizer->state->exception->type = ANTLR3_MISMATCHED_TREE_NODE_EXCEPTION; + + return; +} + + +static void +freeParser (pANTLR3_TREE_PARSER parser) +{ + if (parser->rec != NULL) + { + // This may have ben a delegate or delegator parser, in which case the + // state may already have been freed (and set to NULL therefore) + // so we ignore the state if we don't have it. + // + if (parser->rec->state != NULL) + { + if (parser->rec->state->following != NULL) + { + parser->rec->state->following->free(parser->rec->state->following); + parser->rec->state->following = NULL; + } + } + parser->rec->free(parser->rec); + parser->rec = NULL; + } + + ANTLR3_FREE(parser); +} + +/** Set the input stream and reset the parser + */ +static void +setTreeNodeStream (pANTLR3_TREE_PARSER parser, pANTLR3_COMMON_TREE_NODE_STREAM input) +{ + parser->ctnstream = input; + parser->rec->reset (parser->rec); + parser->ctnstream->reset (parser->ctnstream); +} + +/** Return a pointer to the input stream + */ +static pANTLR3_COMMON_TREE_NODE_STREAM +getTreeNodeStream (pANTLR3_TREE_PARSER parser) +{ + return parser->ctnstream; +} + + +/** Override for standard base recognizer mismatch function + * as we have DOWN/UP nodes in the stream that have no line info, + * plus we want to alter the exception type. + */ +static void +mismatch (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow) +{ + recognizer->exConstruct(recognizer); + recognizer->recoverFromMismatchedToken(recognizer, ttype, follow); +} + +#ifdef ANTLR3_WINDOWS +#pragma warning (push) +#pragma warning (disable : 4100) +#endif + +// Default implementation is for parser and assumes a token stream as supplied by the runtime. +// You MAY need override this function if the standard TOKEN_STREAM is not what you are using. +// +static void * +getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream) +{ + pANTLR3_TREE_NODE_STREAM tns; + pANTLR3_COMMON_TREE_NODE_STREAM ctns; + + tns = (pANTLR3_TREE_NODE_STREAM)(istream->super); + ctns = tns->ctns; + return tns->_LT(tns, 1); +} + + +// Default implementation is for parser and assumes a token stream as supplied by the runtime. +// You MAY need override this function if the standard BASE_TREE is not what you are using. +// +static void * +getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e, + ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow) +{ + pANTLR3_TREE_NODE_STREAM tns; + pANTLR3_COMMON_TREE_NODE_STREAM ctns; + pANTLR3_BASE_TREE node; + pANTLR3_BASE_TREE current; + pANTLR3_COMMON_TOKEN token; + pANTLR3_STRING text; + ANTLR3_INT32 i; + + // Dereference the standard pointers + // + tns = (pANTLR3_TREE_NODE_STREAM)(istream->super); + ctns = tns->ctns; + + // Create a new empty node, by stealing the current one, or the previous one if the current one is EOF + // + current = tns->_LT(tns, 1); + i = -1; + + if (current == &ctns->EOF_NODE.baseTree) + { + current = tns->_LT(tns, -1); + i--; + } + while (((pANTLR3_COMMON_TREE)(current->super))->factory == NULL) + { + current = tns->_LT(tns, i--); + } + + node = current->dupNode(current); + + // Find the newly dupicated token + // + token = node->getToken(node); + + // Create the token text that shows it has been inserted + // + token->setText8 (token, (pANTLR3_UINT8)"<missing "); + text = token->getText (token); + text->append8 (text, (const char *)recognizer->state->tokenNames[expectedTokenType]); + text->append8 (text, (const char *)">"); + + // Finally return the pointer to our new node + // + return node; +} +#ifdef ANTLR3_WINDOWS +#pragma warning (pop) +#endif + |