summaryrefslogtreecommitdiff
path: root/antlr/libantlr3c-3.4/include/antlr3lexer.h
diff options
context:
space:
mode:
authorCarlo Zancanaro <carlo@pc-4w14-0.cs.usyd.edu.au>2012-07-10 13:01:48 +1000
committerCarlo Zancanaro <carlo@pc-4w14-0.cs.usyd.edu.au>2012-07-10 13:01:48 +1000
commitf9fc35785b53aa097a09ab1b865d33497ee1802e (patch)
treea6c8ea8e913ceab2c08e9f7698332bff08681552 /antlr/libantlr3c-3.4/include/antlr3lexer.h
parentd11acd6d52351b35c102e9c18e32d38a11975c5b (diff)
Move antlr. Add `make test` to Makefile.
Diffstat (limited to 'antlr/libantlr3c-3.4/include/antlr3lexer.h')
-rw-r--r--antlr/libantlr3c-3.4/include/antlr3lexer.h193
1 files changed, 193 insertions, 0 deletions
diff --git a/antlr/libantlr3c-3.4/include/antlr3lexer.h b/antlr/libantlr3c-3.4/include/antlr3lexer.h
new file mode 100644
index 0000000..6cc10ef
--- /dev/null
+++ b/antlr/libantlr3c-3.4/include/antlr3lexer.h
@@ -0,0 +1,193 @@
+/** \file
+ * Base interface for any ANTLR3 lexer.
+ *
+ * An ANLTR3 lexer builds from two sets of components:
+ *
+ * - The runtime components that provide common functionality such as
+ * traversing character streams, building tokens for output and so on.
+ * - The generated rules and struutre of the actual lexer, which call upon the
+ * runtime components.
+ *
+ * A lexer class contains a character input stream, a base recognizer interface
+ * (which it will normally implement) and a token source interface (which it also
+ * implements. The Tokensource interface is called by a token consumer (such as
+ * a parser, but in theory it can be anything that wants a set of abstract
+ * tokens in place of a raw character stream.
+ *
+ * So then, we set up a lexer in a sequence akin to:
+ *
+ * - Create a character stream (something which implements ANTLR3_INPUT_STREAM)
+ * and initialize it.
+ * - Create a lexer interface and tell it where it its input stream is.
+ * This will cause the creation of a base recognizer class, which it will
+ * override with its own implementations of some methods. The lexer creator
+ * can also then in turn override anything it likes.
+ * - The lexer token source interface is then passed to some interface that
+ * knows how to use it, byte calling for a next token.
+ * - When a next token is called, let ze lexing begin.
+ *
+ */
+#ifndef _ANTLR3_LEXER
+#define _ANTLR3_LEXER
+
+// [The "BSD licence"]
+// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
+// http://www.temporal-wave.com
+// http://www.linkedin.com/in/jimidle
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+// 3. The name of the author may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+/* Definitions
+ */
+#define ANTLR3_STRING_TERMINATOR 0xFFFFFFFF
+
+#include <antlr3defs.h>
+#include <antlr3input.h>
+#include <antlr3commontoken.h>
+#include <antlr3tokenstream.h>
+#include <antlr3baserecognizer.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct ANTLR3_LEXER_struct
+{
+ /** If there is a super structure that is implementing the
+ * lexer, then a pointer to it can be stored here in case
+ * implementing functions are overridden by this super structure.
+ */
+ void * super;
+
+ /** A generated lexer has an mTokens() function, which needs
+ * the context pointer of the generated lexer, not the base lexer interface
+ * this is stored here and initialized by the generated code (or manually
+ * if this is a manually built lexer.
+ */
+ void * ctx;
+
+ /** A pointer to the character stream whence this lexer is receiving
+ * characters.
+ * TODO: I may come back to this and implement charstream outside
+ * the input stream as per the java implementation.
+ */
+ pANTLR3_INPUT_STREAM input;
+
+ /** Pointer to the implementation of a base recognizer, which the lexer
+ * creates and then overrides with its own lexer oriented functions (the
+ * default implementation is parser oriented). This also contains a
+ * token source interface, which the lexer instance will provide to anything
+ * that needs it, which is anything else that implements a base recognizer,
+ * such as a parser.
+ */
+ pANTLR3_BASE_RECOGNIZER rec;
+
+ /** Pointer to a function that sets the charstream source for the lexer and
+ * causes it to be reset.
+ */
+ void (*setCharStream) (struct ANTLR3_LEXER_struct * lexer, pANTLR3_INPUT_STREAM input);
+
+ /** Pointer to a function that switches the current character input stream to
+ * a new one, saving the old one, which we will revert to at the end of this
+ * new one.
+ */
+ void (*pushCharStream) (struct ANTLR3_LEXER_struct * lexer, pANTLR3_INPUT_STREAM input);
+
+ /** Pointer to a function that abandons the current input stream, whether it
+ * is empty or not and reverts to the previous stacked input stream.
+ */
+ void (*popCharStream) (struct ANTLR3_LEXER_struct * lexer);
+
+ /** Pointer to a function that emits the supplied token as the next token in
+ * the stream.
+ */
+ void (*emitNew) (struct ANTLR3_LEXER_struct * lexer, pANTLR3_COMMON_TOKEN token);
+
+ /** Pointer to a function that constructs a new token from the lexer stored information
+ */
+ pANTLR3_COMMON_TOKEN (*emit) (struct ANTLR3_LEXER_struct * lexer);
+
+ /** Pointer to the user provided (either manually or through code generation
+ * function that causes the lexer rules to run the lexing rules and produce
+ * the next token if there iss one. This is called from nextToken() in the
+ * pANTLR3_TOKEN_SOURCE. Note that the input parameter for this funciton is
+ * the generated lexer context (stored in ctx in this interface) it is a generated
+ * function and expects the context to be the generated lexer.
+ */
+ void (*mTokens) (void * ctx);
+
+ /** Pointer to a function that attempts to match and consume the specified string from the input
+ * stream. Note that strings muse be passed as terminated arrays of ANTLR3_UCHAR. Strings are terminated
+ * with 0xFFFFFFFF, which is an invalid UTF32 character
+ */
+ ANTLR3_BOOLEAN (*matchs) (struct ANTLR3_LEXER_struct * lexer, ANTLR3_UCHAR * string);
+
+ /** Pointer to a function that matches and consumes the specified character from the input stream.
+ * The input stream is required to provide characters via LA() as UTF32 characters. The default lexer
+ * implementation is source encoding agnostic and so input streams do not generally need to
+ * override the default implmentation.
+ */
+ ANTLR3_BOOLEAN (*matchc) (struct ANTLR3_LEXER_struct * lexer, ANTLR3_UCHAR c);
+
+ /** Pointer to a function that matches any character in the supplied range (I suppose it could be a token range too
+ * but this would only be useful if the tokens were in tsome guaranteed order which is
+ * only going to happen with a hand crafted token set).
+ */
+ ANTLR3_BOOLEAN (*matchRange) (struct ANTLR3_LEXER_struct * lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high);
+
+ /** Pointer to a function that matches the next token/char in the input stream
+ * regardless of what it actaully is.
+ */
+ void (*matchAny) (struct ANTLR3_LEXER_struct * lexer);
+
+ /** Pointer to a function that recovers from an error found in the input stream.
+ * Generally, this will be a #ANTLR3_EXCEPTION_NOVIABLE_ALT but it could also
+ * be from a mismatched token that the (*match)() could not recover from.
+ */
+ void (*recover) (struct ANTLR3_LEXER_struct * lexer);
+
+ /** Pointer to function to return the current line number in the input stream
+ */
+ ANTLR3_UINT32 (*getLine) (struct ANTLR3_LEXER_struct * lexer);
+ ANTLR3_MARKER (*getCharIndex) (struct ANTLR3_LEXER_struct * lexer);
+ ANTLR3_UINT32 (*getCharPositionInLine)(struct ANTLR3_LEXER_struct * lexer);
+
+ /** Pointer to function to return the text so far for the current token being generated
+ */
+ pANTLR3_STRING (*getText) (struct ANTLR3_LEXER_struct * lexer);
+
+
+ /** Pointer to a function that knows how to free the resources of a lexer
+ */
+ void (*free) (struct ANTLR3_LEXER_struct * lexer);
+
+}
+ ANTLR3_LEXER;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif