/*
**      html_lex.c      -
**
**
** Copyright (c) 1995  Hughes Technologies Pty Ltd
**
** Permission to use, copy, and distribute for non-commercial purposes,
** is hereby granted without fee, providing that the above copyright
** notice appear in all copies and that both the copyright notice and this
** permission notice appear in supporting documentation.
**
** This software is provided "as is" without any expressed or implied warranty.
**
**
*/

/*
**      This is a scanner for the mSQL tags embedded in html
*/


#include <stdio.h>
#include <sys/types.h>
#include <msql.h>

#ifdef OS2
#  include <common/config.h>
#  include <common/portable.h>
#  include "w3-msql.h"
#  include "w3_yacc.h"
#else
#  include "portable.h"
#  include "w3-msql.h"
#  include "y_tab.h"
#endif


#define REG             register
#define NUM_HASH        16

FILE                    *lexFp;
static  u_char          unGetBuf[1024],
                        tokenBuf[1024],
                        *unGetMark,
                        *tokenMark;
u_char  *yytext         = NULL;
u_int   yytoklen        = 0;
int     yylineno        = 1;
static  u_char          *tokPtr,
                        *tokStart;
static  int             state = 0,
                        charCount,
                        origCharCount;
YYSTYPE                 yylval;




/*
** Macros for handling the scanner's internal pointers
#define yyGet()         (*tokPtr++); yytoklen++
#define yyUnget()       tokPtr--; yytoklen--
#define yySkip()        (*tokPtr++); tokStart++
#define yyRevert()      {tokPtr=tokStart; yytoklen=0;}
#define yyUpdate()      {tokStart=tokPtr; yytoklen=0;}
**
*/

#define yyReturn(t) {return(t);}



/*
** Macros for matching character classes.  These are in addition to
** those provided in <ctypes.h>
*/
#ifdef  iswhite
# undef iswhite
#endif
#ifdef OS2
#  define iswhite(c)      (c == ' ' || c == '\t' || c == '\n' || c == '\r')
#else
#  define iswhite(c)      (c == ' ' || c == '\t' || c == '\n')
#endif



/*
** Debugging macros.
*/

/* #define DEBUG_STATE  /* Define this to watch the state transitions */

#ifdef LEX_DEBUG
#  define token(x)      (int) "x"
#else
#  define token(x)      x
#endif /* LEX_DEBUG */

#ifdef DEBUG_STATE
#  define CASE(x)       case x: if (x) printf("%c -> state %d\n",c,x); \
                                else printf("Scanner starting at state 0\n");
#else
#  define CASE(x)       case x:
#endif



typedef struct symtab_s {
        char    *name;
        int     tok;
} symtab_t;


static symtab_t symtab[16][16] = {
        { /* 0 */
                { 0,            0}
        },
        { /* 1 */
                { 0,            0}
        },
        { /* 2 */
                { "free",       token(FREE)},
                { 0,            0}
        },
        { /* 3 */
                { 0,            0}
        },
        { /* 4 */
                { 0,            0}
        },
        { /* 5 */
                { "database",   token(DATABASE)},
                { "seek",       token(SEEK)},
                { 0,            0}
        },
        { /* 6 */
                { "close",      token(CLOSE)},
                { "query",      token(QUERY)},
                { 0,            0}
        },
        { /* 7 */
                { "print_rows", token(PRINT_ROWS)},
                { 0,            0}
        },
        { /* 8 */
                { 0,            0}
        },
        { /* 9 */
                { "else",       token(ELSE)},
                { 0,            0}
        },
        { /* 10 */
                { "connect",    token(CONNECT)},
                { 0,            0}
        },
        { /* 11 */
                { 0,            0}
        },
        { /* 12 */
                { "set",        token(SET)},
                { 0,            0}
        },
        { /* 13 */
                { "print",      token(PRINT)},
                { 0,            0}
        },
        { /* 14 */
                { "translate",  token(TRANSLATE)},
                { 0,            0}
        },
        { /* 15 */
                { "if",         token(IF)},
                { "fi",         token(FI)},
                { 0,            0}
        }
};



u_char yyGet()
{
        u_char  c;

        yytoklen++;
        charCount++;
        if (unGetMark > unGetBuf)
        {
                unGetMark--;
                c = *unGetMark;
        }
        else
        {
                c = fgetc(lexFp);
                if (feof(lexFp))
                        c = 0;
        }
        *tokenMark++ = c;
        if (c == '\n')
                yylineno++;
        return(c);
}


void yyUnget()
{
        yytoklen--;
        charCount--;
        *unGetMark++ = *--tokenMark;
        if (*tokenMark == '\n')
                yylineno--;
}



u_char yySkip()
{
        u_char  c;

        charCount++;
        if (unGetMark > unGetBuf)
        {
                unGetMark--;
                c = *unGetMark;
        }
        else
        {
                c = fgetc(lexFp);
                if (feof(lexFp))
                        c = 0;
        }
        tokenMark--;
        * tokenMark++ = c;
        if (c == '\n')
                yylineno++;
        return(c);
}


void yyRevert()
{
        charCount = origCharCount;
        while(yytoklen)
        {
                yyUnget();
        }
}


void yyUpdate()
{
        yytoklen = charCount = origCharCount = 0;
        tokenMark = tokenBuf;
}



initScanner(fd)
        int     fd;
{
        state = 0;
        yylineno = 1;
        charCount = 0;
#ifdef OS2
        lexFp = fdopen(fd,"rb");
#else
        lexFp = fdopen(fd,"r");
#endif
        tokenMark = tokenBuf;
        unGetMark = unGetBuf;
}


int findKeyword(tok,len)
        char    *tok;
        int     len;
{
        REG     char    *cp1,
                        *cp2,
                        tmp;
        REG     symtab_t *stab;
        int     found;
        REG     int     hash=0,
                        index=0;


        cp1 = tok;
        while(*cp1 && index++ < len)
        {
                hash += *cp1++;
        }
        hash = hash & (NUM_HASH - 1);

        stab = symtab[hash];
        while(stab->name)
        {
                cp1 = stab->name;
                cp2 = tok;
                found = 1;
                while(cp2 - tok < len)
                {
                        if (!(*cp1))
                        {
                                found = 0;
                                break;
                        }
                        tmp = *cp2++;
                        if (tmp >64 && tmp<91)
                                tmp+=32;
                        if (tmp != *cp1++)
                        {
                                found = 0;
                                break;
                        }
                }
                if (*cp1)
                {
                        found = 0;
                }
                if (found)
                {
                        yytext = (u_char *)stab->name;
                        yylval = (YYSTYPE)stab->tok;
                        return(stab->tok);
                }
                stab++;
        }
        return(0);
}

u_char *tokenDup(tok,len)
        u_char  *tok;
        int     len;
{
        u_char  *new;

        new = (u_char *)malloc(len+1);
        (void)bcopy(tok,new,len);
        *(new + len) = 0;
        return(new);
}


u_char *readTextLiteral(tok)
        u_char  *tok;
{
        REG     u_char c;
        int     bail;

        bail = 0;
        while(!bail)
        {
                c = yyGet();
                switch(c)
                {
                        case 0:
                                return(NULL);

                        case '\\':
                                c = yyGet();
                                if (!c)
                                        return(NULL);
                                break;

                        case '"':
                                bail=1;
                                break;
                }
        }
        return(tokenDup(tok,yytoklen));
}


int yylex()
{
        REG     u_char  c = 0;
        int     tokval;
        static  ifState = 0,
                ifDepth = 0;
        static  u_char dummyBuf[2];


        /*
        ** Handle the end of input.  We return an EOI token when we hit
        ** the end and then return a 0 on the next call to yylex.  This
        ** allows the parser to do the right thing with trailing garbage
        ** in the expression.
        */
        if (state == 1000)
        {
                return(0);
        }

        /*
        ** Dive into the state machine
        */
        tokPtr = tokStart;
        yytext = NULL;
        yytoklen = 0;
        tokenMark = tokenBuf;
        tokStart = tokenBuf;
        origCharCount = charCount;


        while(1)
        {
                switch(state)
                {
                        /* State 0 : Start of token */
                        CASE(0)
                                tokPtr = tokStart;
                                yytext = NULL;
                                yytoklen = 0;
                                c = yyGet();
                                if (c == 0)
                                {
                                        state = 1000;
                                        break;
                                }
                                if (c == '<')
                                {
                                        state = 1;
                                        continue;
                                }
                                if (checkIf())
                                        printf("%c",c);
                                yyUpdate();
                                break;

                        /* State 1 : Look for mSQL tag */
                        CASE(1)
                                c = yyGet();
                                if (c != '!')
                                {
                                        state = 998;
                                        continue;
                                }

                                c = yyGet();
                                while (iswhite(c))
                                {
                                        c = yyGet();
                                }

                                if (toupper(c) != 'M')
                                {
                                        state = 998;
                                        continue;
                                }
                                c = yyGet();
                                if (toupper(c) != 'S')
                                {
                                        state = 998;
                                        continue;
                                }
                                c = yyGet();
                                if (toupper(c) != 'Q')
                                {
                                        state = 998;
                                        continue;
                                }
                                c = yyGet();
                                if (toupper(c) != 'L')
                                {
                                        state = 998;
                                        continue;
                                }
                                c = yyGet();
                                if (!iswhite(c))
                                {
                                        state = 998;
                                        continue;
                                }
                                yyUnget();
                                yyUpdate();
                                state = 2;
                                break;


                        /* State 2 : mSQL tag open */
                        CASE(2)
                                c = yyGet();
                                while (iswhite(c))
                                {
                                        c = yySkip();
                                }
                                if (isalpha(c))
                                {
                                        state = 3;
                                        break;
                                }
                                if (isdigit(c))
                                {
                                        state = 6;
                                        break;
                                }
                                if (c == '"')
                                {
                                        state = 4;
                                        break;
                                }
                                if (c == '\'')
                                {
                                        state = 13;
                                        break;
                                }
                                if (c == '$')
                                {
                                        state = 5;
                                        break;
                                }
                                if (c == '>')
                                {
                                        state = 7;
                                        break;
                                }
                                if (c == '<')
                                {
                                        state = 8;
                                        break;
                                }
                                if (c == '=')
                                {
                                        state = 9;
                                        break;
                                }
                                if (c == '|')
                                {
                                        state = 10;
                                        break;
                                }
                                if (c == '&')
                                {
                                        state = 11;
                                        break;
                                }
                                if (c == '!')
                                {
                                        state = 12;
                                        break;
                                }

                                state = 999;
                                break;

                        /* State 3 : Word */
                        CASE(3)
                                c = yyGet();
                                if (!iswhite(c) && c != '>' && c!='.')
                                {
                                        state = 3;
                                        break;
                                }

                                yyUnget();
                                tokval = findKeyword(tokStart, yytoklen);
                                if (tokval)
                                {
#ifdef LEX_DEBUG
                                        if (strcmp(tokval,"IF") == 0)
#else
                                        if (tokval == IF)
#endif
                                        {
                                                ifState = 1;
                                        }
                                        state = 2;
                                        yyReturn(tokval);
                                }
                                else
                                {
                                        state = 2;
                                        yytext = tokenDup(tokStart,yytoklen);
                                        yylval = (YYSTYPE) yytext;
                                        yyReturn(token(IDENT));
                                }
                                break;

                        /* Text literal */
                        CASE(4)
                                yytext = readTextLiteral(tokStart);
                                yylval = (YYSTYPE) yytext;
                                if (yytext)
                                {
                                        state = 2;
                                        yyReturn(token(TEXT));
                                }
                                state = 999;
                                break;


                        /* Variable */
                        CASE(5)
                                c = yyGet();
                                while (isalnum(c) || c == '_' || c== '-')
                                {
                                        c = yyGet();
                                }
                                yyUnget();
                                state = 2;
                                yytext = tokenDup(tokStart,yytoklen);
                                yylval = (YYSTYPE) yytext;
                                yyReturn(token(VAR));
                                break;

                        /* Number */
                        CASE(6)
                                c = yyGet();
                                if (isalpha(c))
                                {
                                        state=999;
                                        break;
                                }

                                if (isdigit(c))
                                {
                                        break;
                                }

                                yyUnget();
                                state = 2;
                                yytext = tokenDup(tokStart,yytoklen);
                                yylval = (YYSTYPE) yytext;
                                yyReturn(token(NUM));
                                break;

                        /* Cond_op  > or TAG_CLOSE */
                        CASE(7)
                                c = yyGet();
                                if (c == '=')
                                {
                                        state = 2;
                                        yytext = tokenDup(tokStart,yytoklen);
                                        yylval = (YYSTYPE) yytext;
                                        yyReturn(token(GE));
                                        break;
                                }
                                yyUnget();
                                state = 2;
                                yytext = tokenDup(tokStart,yytoklen);
                                yylval = (YYSTYPE) yytext;
                                if (ifState)
                                {
                                        yyReturn(token(GT));
                                }
                                else
                                {
                                        state = 0;
                                        yyReturn(token(TAG_CLOSE));
                                }
                                break;

                        /* Cond_op  < */
                        CASE(8)
                                c = yyGet();
                                if (c == '=')
                                {
                                        state = 2;
                                        yytext = tokenDup(tokStart,yytoklen);
                                        yylval = (YYSTYPE) yytext;
                                        yyReturn(token(LE));
                                        break;
                                }
                                yyUnget();
                                state = 2;
                                yytext = tokenDup(tokStart,yytoklen);
                                yylval = (YYSTYPE) yytext;
                                yyReturn(token(LT));
                                break;

                        /* Cond_op  = */
                        CASE(9)
                                c = yyGet();
                                if (c == '=')
                                {
                                        state = 2;
                                        yytext = tokenDup(tokStart,yytoklen);
                                        yylval = (YYSTYPE) yytext;
                                        yyReturn(token(EQ));
                                        break;
                                }
                                state = 999;
                                break;

                        /* bool_op  || */
                        CASE(10)
                                c = yyGet();
                                if (c == '|')
                                {
                                        state = 2;
                                        yytext = tokenDup(tokStart,yytoklen);
                                        yylval = (YYSTYPE) yytext;
                                        yyReturn(token(LOGICAL_OR));
                                        break;
                                }
                                state = 999;
                                break;

                        /* bool_op  && */
                        CASE(11)
                                c = yyGet();
                                if (c == '&')
                                {
                                        state = 2;
                                        yytext = tokenDup(tokStart,yytoklen);
                                        yylval = (YYSTYPE) yytext;
                                        yyReturn(token(LOGICAL_AND));
                                        break;
                                }
                                state = 999;
                                break;

                        /* compare != */
                        CASE(12)
                                c = yyGet();
                                if (c == '=')
                                {
                                        state = 2;
                                        yytext = tokenDup(tokStart,yytoklen);
                                        yylval = (YYSTYPE) yytext;
                                        yyReturn(token(NE));
                                        break;
                                }
                                state = 999;
                                break;

                        /* char literal */
                        CASE(13)
                                c = yyGet();
                                c = yyGet();
                                if (c != '\'')
                                {
                                        state = 999;
                                        break;
                                }
                                state = 2;
                                yytext = tokenDup(tokStart,yytoklen);
                                yylval = (YYSTYPE) yytext;
                                yyReturn(token(CHAR));
                                break;

                        /* State 999 : Unknown token in non-mSQL tag. */
                        CASE(998)
                                yyRevert();
                                c = yySkip();
                                state = 0;
                                if (checkIf())
                                        printf("%c",c);
                                break;

                        /* State 999 : Unknown token in mSQL tag. */
                        CASE(999)
                                yyRevert();
                                c = yySkip();
                                if (ifState)
                                {
                                        if (c == '(')
                                                ifDepth++;
                                        if (c == ')')
                                                ifDepth--;
                                        if (ifDepth == 0)
                                                ifState = 0;
                                }
                                *dummyBuf = c;
                                *(dummyBuf + 1) = 0;
                                yytext = dummyBuf;
                                yylval = (YYSTYPE) yytext;
                                state = 2;
                                yyReturn(token(yytext[0]));
                                break;

                        /* State 1000 : End Of Input */
                        CASE(1000)
                                yyReturn(token(END_OF_INPUT));

                }
        }
}


#ifdef NOTDEF

checkIf()
{
 return(0);
}


main()
{
        char    *p;

        initScanner(fileno(stdin));
        while(p = (char *) yylex())
        {
                printf("%-15.15s of length %u is \"%s\"\n", p, yytoklen,
                        yytext?yytext:(u_char *)"(null)");
        }
}

#endif
