%{
/**
 * Lexer for Doctor J (Java code analyzer).
 *   author: Jeffrey E. Pace (jpace@erols.com)
 */

#ifndef AST_h
#include "AST.h"
#endif

#include "grammar.h"

#ifndef File_h
#include "File.h"
#endif

#ifndef Parser_h
#include "Parser.h"
#endif

#ifndef doctorj_stdio_h
#define doctorj_stdio_h
#include <stdio.h>
#endif

#ifndef doctorj_ctype_h
#define doctorj_ctype_h
#include <ctype.h>
#endif

#ifndef doctorj_string
#define doctorj_string
#include <string>
#endif

using namespace doctorj;

extern Parser parser;

/**
 * Whether they have been warned about using the 'assert' keyword.
 */
static bool warnedAboutAssert = false;

#ifdef YY_INPUT
#undef YY_INPUT
#endif

#define YY_INPUT(buf,result,max_size) \
	result = parser.currentFile()->getInput(buf, max_size);

#define make_value(classType, tokenType) {\
    parser.display(#tokenType);\
    doctorj::AstLeaf* leaf = new classType(yytext, \
                                        parser.position(), \
                                        parser.currentFile());\
    parser.reset(leaf, yyleng);\
    yylval.n_leaf = leaf; \
    return tokenType;\
}

#define make_token(classType, tokenType) {\
    parser.display(#tokenType);\
    doctorj::AstLeaf* leaf = new classType(parser.position(), \
                                        parser.currentFile());\
    parser.reset(leaf, yyleng);\
    yylval.n_leaf = leaf; \
    return tokenType;\
}

%}

%x in_comment

%e 1600
%n 800
%p 5000

HexDigit        [0-9a-fA-F]
Digit           [0-9]
OctalDigit      [0-7]
TetraDigit      [0-3]
NonZeroDigit    [1-9]
Letter          [a-zA-Z_]
UniEsc          [\1b]

OctEscape1      [\\]{OctalDigit}
OctEscape2      [\\]{OctalDigit}{OctalDigit}
OctEscape3      [\\]{TetraDigit}{OctalDigit}{OctalDigit}
OctEscape       ({OctEscape1}|{OctEscape2}|{OctEscape3})

/** Unicode, as ASCII, is \uHHHH (H is a hex digit.) */
UniCodeChar     [\\][u]{HexDigit}{HexDigit}{HexDigit}{HexDigit}

Escape          [\\]([r]|[n]|[b]|[f]|[t]|[\\]|[\']|[\"])
ULetter         (\$|{Letter}|{UniEsc})
Identifier      {ULetter}({ULetter}|{Digit})*

IntSuffix       ([l]|[L])
DecimalNum      {NonZeroDigit}{Digit}*{IntSuffix}?
OctalNum        [0]{OctalDigit}*{IntSuffix}?
HexNum          [0]([x]|[X]){HexDigit}{HexDigit}*{IntSuffix}?
AstIntegerLiteral  ({DecimalNum}|{OctalNum}|{HexNum})

Sign            ([\+]|[\-])
FltDblSuffix    ([f]|[F]|[d]|[D])
SignedInt       {Sign}?{Digit}+
Expo            ([e]|[E])
ExponentPart    {Expo}{SignedInt}?
Float1          {Digit}+[\.]{Digit}+?{ExponentPart}?{FltDblSuffix}?
Float2          [\.]{Digit}+{ExponentPart}?{FltDblSuffix}?
Float3          {Digit}+{ExponentPart}{FltDblSuffix}?
Float4          {Digit}+{FltDblSuffix}
FloatingPoint   ({Float1}|{Float2}|{Float3}|{Float4})

/* originally without escaped tick. */
AnyChrChr       ({UniCodeChar}|[^\\\'])
/* originally without escaped quote. */
AnyStrChr       ({UniCodeChar}|[^\\\"])

CR              [\n]

/**
 * A single-quoted character.
 */
Character       [\']({Escape}|{OctEscape}|{AnyChrChr})[\']

/**
 * A double-quoted string, handles escaped characters.
 */
String          [\"]({Escape}|{OctEscape}|{AnyStrChr})*[\"]

%%

"/*" {
    /* The comment has begun */
    BEGIN(in_comment);
    parser.incrementPosition(yyleng);
}

<in_comment>"*"+"/" {
    /* That's the end of the comment (yes, those two chars have to be separated) */
    BEGIN(INITIAL);
    parser.incrementPosition(yyleng); 
}

<in_comment>[^\*\n]* {
    /* eat anything that's not a '*' */
    parser.incrementPosition(yyleng); 
}

<in_comment>"*"+[^*/\n]* {
    /* eat '*' not followed by '/'s' */
    parser.incrementPosition(yyleng); 
}

<in_comment>\n {
    /* eat end of line */
    parser.incrementPosition(yyleng); 
}

"//"[^\n]* {
    /* C++/Java one-line comment. */
    parser.incrementPosition(yyleng); 
}

"true"            { make_token(AstTrueLiteral,  BOOLEAN_LITERAL); }
"false"           { make_token(AstFalseLiteral, BOOLEAN_LITERAL); }

{DecimalNum}      { make_value(AstIntegerLiteral, INTEGER_LITERAL); }
{OctalNum}        { make_value(AstIntegerLiteral, INTEGER_LITERAL); }
{HexNum}          { make_value(AstIntegerLiteral, INTEGER_LITERAL); }

{Float1}          { make_value(AstFloatingPointLiteral, FLOATING_POINT_LITERAL); }
{Float2}          { make_value(AstFloatingPointLiteral, FLOATING_POINT_LITERAL); }
{Float3}          { make_value(AstFloatingPointLiteral, FLOATING_POINT_LITERAL); }
{Float4}          { make_value(AstFloatingPointLiteral, FLOATING_POINT_LITERAL); }

{Character}       { make_value(AstCharacterLiteral, CHARACTER_LITERAL); }
{String}          { make_value(AstStringLiteral,    STRING_LITERAL); }

";"               { make_token(AstSemicolon, SEMICOLON); }

"="               { make_token(AstEq, EQ); }
">"               { make_token(AstGt, GT); }
"<"               { make_token(AstLt, LT); }
"!"               { make_token(AstNot, NOT); }
"~"               { make_token(AstComp, COMP); }
"?"               { make_token(AstQuestion, QUESTION); }
":"               { make_token(AstColon, COLON); }
"=="              { make_token(AstEqeq, EQEQ); }
"<="              { make_token(AstLteq, LTEQ); }
">="              { make_token(AstGteq, GTEQ); }
"!="              { make_token(AstNoteq, NOTEQ); }
"&&"              { make_token(AstAndand, ANDAND); }
"||"              { make_token(AstOror, OROR); }
"++"              { make_token(AstPlusplus, PLUSPLUS); }
"--"              { make_token(AstMinusminus, MINUSMINUS); }
"+"               { make_token(AstPlus, PLUS); }
"-"               { make_token(AstMinus, MINUS); }
"*"               { make_token(AstMult, MULT); }
"/"               { make_token(AstDiv, DIV); }
"&"               { make_token(AstAnd, AND); }
"|"               { make_token(AstOr, OR); }
"^"               { make_token(AstXor, XOR); }
"%"               { make_token(AstMod, MOD); }
"<<"              { make_token(AstLshift, LSHIFT); }
">>"              { make_token(AstRshift, RSHIFT); }
">>>"             { make_token(AstUrshift, URSHIFT); }
"+="              { make_token(AstPluseq, PLUSEQ); }
"-="              { make_token(AstMinuseq, MINUSEQ); }
"*="              { make_token(AstMulteq, MULTEQ); }
"/="              { make_token(AstDiveq, DIVEQ); }
"&="              { make_token(AstAndeq, ANDEQ); }
"|="              { make_token(AstOreq, OREQ); }
"^="              { make_token(AstXoreq, XOREQ); }
"%="              { make_token(AstModeq, MODEQ); }
"<<="             { make_token(AstLshifteq, LSHIFTEQ); }
">>="             { make_token(AstRshifteq, RSHIFTEQ); }
">>>="            { make_token(AstUrshifteq, URSHIFTEQ); }

"abstract"        { make_token(AstAbstract, ABSTRACT); }
"assert"          { 
    /* If this was processed with the --source 1.4 option, then this is an
       assert statement, since "assert" is a keyword in Java 1.4.

       If this was processed without the 1.4 source option, or if there
       is something following this that looks like an expression.

       For example:
           assert false;
           assert p != null;
           assert i > 4;
           
       But not:
           assert(false);
           assert(p != null);
           assert(i = 4);

       Since those look like method invocations.
    */
    
    if (parser.javaVersion() >= 1.4) {
        make_token(AstAssert, ASSERT);
    }
    else {
        // We could deduce what type of code is being processed, but we'll just
        // assume that it is 1.3, without assert statements. But at least we'll
        // warn them about collisions with reserved words.
        if (!warnedAboutAssert) {
            parser.reportWarning("'assert' is a keyword as of Java 1.4");
            warnedAboutAssert = true;
        }
        make_value(AstIdentifier, IDENTIFIER);
    }
}
"boolean"         { make_token(AstBoolean, BOOLEAN); }
"break"           { make_token(AstBreak, BREAK); }
"byte"            { make_token(AstByte, BYTE); }
"case"            { make_token(AstCase, CASE); }
"catch"           { make_token(AstCatch, CATCH); }
"char"            { make_token(AstChar, CHAR); }
"class"           { make_token(AstClass, CLASS); }
"const"           { make_token(AstConst, CONST); }
"continue"        { make_token(AstContinue, CONTINUE); }
"default"         { make_token(AstDefault, DEFAULT); }
"do"              { make_token(AstDo, DO); }
"double"          { make_token(AstDouble, DOUBLE); }
"else"            { make_token(AstElse, ELSE); }
"extends"         { make_token(AstExtends, EXTENDS); }
"final"           { make_token(AstFinal, FINAL); }
"finally"         { make_token(AstFinally, FINALLY); }
"float"           { make_token(AstFloat, FLOAT); }
"for"             { make_token(AstFor, FOR); }
"goto"            { make_token(AstGoto, GOTO); }
"if"              { make_token(AstIf, IF); }
"implements"      { make_token(AstImplements, IMPLEMENTS); }
"import"          { make_token(AstImport, IMPORT); }
"instanceof"      { make_token(AstInstanceof, INSTANCEOF); }
"int"             { make_token(AstInt, INT); }
"interface"       { make_token(AstInterface, INTERFACE); }
"long"            { make_token(AstLong, LONG); }
"native"          { make_token(AstNative, NATIVE); }
"new"             { make_token(AstNew, NEW); }
"null"            { make_token(AstNullLiteral, NULL_LITERAL); }
"package"         { make_token(AstPackage, PACKAGE_); }
"private"         { make_token(AstPrivate, PRIVATE); }
"protected"       { make_token(AstProtected, PROTECTED); }
"public"          { make_token(AstPublic, PUBLIC); }
"return"          { make_token(AstReturn, RETURN); }
"short"           { make_token(AstShort, SHORT); }
"static"          { make_token(AstStatic, STATIC); }
"strictfp"        { make_token(AstStrictfp, STRICTFP); }
"super"           { make_token(AstSuper, SUPER); }
"switch"          { make_token(AstSwitch, SWITCH); }
"synchronized"    { make_token(AstSynchronized, SYNCHRONIZED); }
"this"            { make_token(AstThis, THIS); }
"throw"           { make_token(AstThrow, THROW); }
"throws"          { make_token(AstThrows, THROWS); }
"transient"       { make_token(AstTransient, TRANSIENT); }
"try"             { make_token(AstTry, TRY); }
"void"            { make_token(AstVoid, VOID); }
"volatile"        { make_token(AstVolatile, VOLATILE); }
"while"           { make_token(AstWhile, WHILE); }

{Identifier}      { make_value(AstIdentifier, IDENTIFIER); }

"\{"              { make_token(AstLbrace, LBRACE); }
"\}"              { make_token(AstRbrace, RBRACE); }
"\["              { make_token(AstLbracket, LBRACKET); }
"\]"              { make_token(AstRbracket, RBRACKET); }

"\."              { make_token(AstDot, DOT); }
"\("              { make_token(AstLparen, LPAREN); }
"\)"              { make_token(AstRparen, RPAREN); }
","               { make_token(AstComma, COMMA); }

{CR}              { parser.incrementPosition(yyleng); }
[\f]              { parser.incrementPosition(yyleng); }
[\t]+             { parser.incrementPosition(yyleng); }
^[ ]*             { parser.incrementPosition(yyleng); }
[\b]              { parser.incrementPosition(yyleng); }
[ ]+              { parser.incrementPosition(yyleng); }
