//
// This file contains proprietary information of Jesse Buckwalter.
// Copying or reproduction without prior written approval is prohibited.
//
// Copyright (c) 1993, 1994, 1995
// Jesse Buckwalter
// 525 Third Street
// Annapolis, MD 21403
// (410) 263-8652
//

// LEXICAL.CPP

// Implements class LexicalAnalyzer, which performs lexical analysis on a
// statement.

#ifndef  __CONIO_H
#include <conio.h>
#endif

#ifndef  __CYTPE_H
#include <ctype.h>
#endif

#ifndef  __STDIO_H
#include <stdio.h>
#endif

#ifndef  __STDLIB_H
#include <stdlib.h>
#endif

#ifndef  __STRING_H
#include <string.h>
#endif

#ifndef  __LEXICAL_H
#include "lexical.h"
#endif

// Possible error codes

#define E_INVALIDCHAR   0
#define E_SYMTOLONG     1

// This table is the list of all characters recognized by the parser.  If the
// statement to be parsed contains a character NOT in this table, an error is
// issued.

const char LexicalAnalyzer::validChars[ NUMVALIDCHARS ] =
                         {
                            ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
                            'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
                            't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2',
                            '3', '4', '5', '6', '7', '8', '9', '(', ')', '+',
                            '-', '*', '/', '=', '<', '>', '!', '|', '&', '_',
                            '"', '?', ';', '\0', 0xff
                         };

// The next two tables implement a simple hash table to quickly look up a
// string to see if it is a keyword.

unsigned LexicalAnalyzer::map[ 26 ] =
                         {
                             5,  7, 10, 15, 18,   // A, B, C, D, E
                            36, 37, 39, 40,  0,   // F, G, H, I, J
                            43, 44, 54, 55, 56,   // K, L, M, N, O
                            58,  0,  0, 60, 62,   // P, Q, R, S, T
                            64,  0, 66,  0,  0,   // U, V, W, X, Y
                             0                    // Z
                         };

// Changes to the keywords table require changes to the map.

char* LexicalAnalyzer::keywords[ 67 ] =
                        {
                           "",                          //  0 unused
                           "alias",                     //  1
                           "and",                       //  2
                           "asctime",                   //  3
                           "auto",                      //  4
                           "ax25_uploader",             //  5
                           "bbs_message_type",          //  6
                           "bulletin_id_number",        //  7
                           "compression_description",   //  8
                           "compression_type",          //  9
                           "create_time",               // 10
                           "day",                       // 11
                           "days",                      // 12
                           "debug",                     // 13
                           "destination",               // 14
                           "download_count",            // 15
                           "equationsize",              // 16
                           "expire_time",               // 17
                           "extramask",                 // 18
                           "f1",                        // 19
                           "f10",                       // 20
                           "f11",                       // 21
                           "f12",                       // 22
                           "f2",                        // 23
                           "f3",                        // 24
                           "f4",                        // 25
                           "f5",                        // 26
                           "f6",                        // 27
                           "f7",                        // 28
                           "f8",                        // 29
                           "f9",                        // 30
                           "file_description",          // 31
                           "file_ext",                  // 32
                           "file_name",                 // 33
                           "file_number",               // 34
                           "file_size",                 // 35
                           "file_type",                 // 36
                           "getenvar",                  // 37
                           "hour",                      // 38
                           "hours",                     // 39
                           "input",                     // 40
                           "keypress",                  // 41
                           "keytitle",                  // 42
                           "keywords",                  // 43
                           "last_modified_time",        // 44
                           "maskauto",                  // 45
                           "maskdone",                  // 46
                           "masknever",                 // 47
                           "maskpriority",              // 48
                           "minute",                    // 49
                           "minutes",                   // 50
                           "modulus",                   // 51
                           "mod",                       // 52
                           "myaddr",                    // 53
                           "mycall",                    // 54
                           "never",                     // 55
                           "or",                        // 56
                           "precedence",                // 57
                           "priority",                  // 58
                           "seu_flag",                  // 59
                           "source",                    // 60
                           "title",                     // 61
                           "today",                     // 62
                           "upload_time",               // 63
                           "user_file_name",            // 64
                           "week",                      // 65
                           "weeks"                      // 66
                        };

char* LexicalAnalyzer::errorMessages[ 2 ] =
                       {
                          "Invalid character in input.",
                          "Symbol too long."
                       };


void LexicalAnalyzer::error( int errcode )
// --------------------------------------------------------------------------
// error displays an error message corresponding to errcode.
// --------------------------------------------------------------------------
{
   printf( "!!!! %s\n", errorMessages[ errcode ] );
   while (!kbhit());
   getch();

   ch = ENDOFLINE;                               // force scanning to halt
};

char LexicalAnalyzer::getNextChar()
// --------------------------------------------------------------------------
// Read the next character from the input line.  Returns null char '\0' if
// at end of line
// --------------------------------------------------------------------------
{
   if (ch)                                       // if non-null, then append 
   {                                             // to symbol string
      if (strlen( symbol ) < MAXSYMLENGTH)
      {
          *pSymbol++ = ch;
          *pSymbol = '\0';
         ch = *bufPtr++;
         charOffset++;
      }
      else
      {
         printf( "%s\n", symbol );
         error( E_SYMTOLONG );
      }
   };
   return ch;
}

void LexicalAnalyzer::lookupKeyword( char* sym, int& keywrd )
// --------------------------------------------------------------------------
// Determine if sym exists in the keyword table.  If it does, return the
// table address.  If it doesn't exist, then say its a string constant.
// --------------------------------------------------------------------------
{
   int foundit;

   // Compute a 'hash code' using the symbol's first letter.  Map that
   // through map[] to point to a potential match in the keywords[] table.

   unsigned index = map[ sym[ 0 ] - 'a' ];
   if (index == 0)
   {
      keywrd = STRCONSTANT;                      // this symbol is definitely
      return;                                    // not a keyword
   }
   do
   {
      foundit = strcmp( sym, keywords[ index ] );
      if (foundit == 0)                          // if sym matches keyword
      {                                          // table entry then return the
         keywrd = index;                         // index value.
         return;
      };
      if (foundit > 0)                           // if sym < table entry,
      {                                          // then we are all done;
         keywrd = STRCONSTANT;                   // return as string constant
         return;
      }
      index--;                                   // otherwise, decrement to
                                                 // next position in the table.
   } while (1);
};

void LexicalAnalyzer::initLexicalyzer( const char* lineToParse )
// --------------------------------------------------------------------------
// Initializes the lexical analyzer to parse the string given as its
// parameter.
// --------------------------------------------------------------------------
{
   strcpy( statement, lineToParse);
   bufPtr = (char*) &statement;

   // Before calling getNextChar() for the first time, need to insure that
   // ch != ENDOFLINE.  This is done so that getNextChar() can efficiently
   // detect ENDOFLINE and return ch as the result.

   ch = ' ';
   charOffset = -1;
   pSymbol = (char*) &symbol;                    // initialize pointer to
                                                 // beginning of symbol string
   *pSymbol = 0;
   ch = getNextChar();
}

unsigned LexicalAnalyzer::getSymbol()
// --------------------------------------------------------------------------
// This is the "guts" of the lexical analyzer.  getSymbol() scans the input
// to identify the next valid token in the langauge.  On return 'token' is
// set to a coded representation of the item it found, 'symbol' contains the
// string (such as identifier name or string constant value ), and for
// numerical constant values, 'tokenValue' contains the value.
// --------------------------------------------------------------------------
{
   int symOffset = charOffset;
   token = -1;                                   // no token
   pSymbol = (char*) &symbol;                    // initialize pointer to
   do                                            // beginning of symbol string
   {
      ch = tolower( ch );
      if (memchr( validChars, ch, NUMVALIDCHARS ) == 0)
         error( E_INVALIDCHAR );

      // Note that getSymbol() reads the statement character by character
      // until it hits endofline or identifies a valid character sequence.

      int hex;
      switch (ch)
      {
         case '\0':
         case 0xff:
         case ';':                               // comment
            token = ENDOFLINE;
            break;
         case ' ':
            while (ch == ' ')                    // ignore blanks by skipping
               ch = getNextChar();               // over them
            pSymbol = (char*) &symbol;           // reset symbol to null 
            symOffset = charOffset;
            break;                               // string, no need to save
                                                 // the blanks.

         // if ch is a letter, then obtain a keyword or string constant.

         case 'a': case 'b': case 'c': case 'd': case 'e':
         case 'f': case 'g': case 'h': case 'i': case 'j':
         case 'k': case 'l': case 'm': case 'n': case 'o':
         case 'p': case 'q': case 'r': case 's': case 't':
         case 'u': case 'v': case 'w': case 'x': case 'y':
         case 'z': case '_': case '*': case '?': case '/':
            while (isalpha( ch ) || isdigit( ch ) || ch == '_' ||
                   ch == '*' || ch == '?' || ch == '/')
               ch = tolower( getNextChar() );
            
            // Call lookupKeyword() to see if the symbol is a keyword.  The
            // function sets token either to a coded representation of the
            // keyword, or STRCONSTANT.

            lookupKeyword( symbol, token );
            if (token == STRCONSTANT)            // retain letters' case
               memcpy( symbol, statement + symOffset, strlen( symbol ) );
            break;

         // Read literal strings from the input.

         case '"':
            ch = getNextChar();                  // skip over quote
            pSymbol = (char*) &symbol;           // reset symbol to null
            while (ch != 0 && ch != '"')
               ch = getNextChar();
            if (ch == '"')
            {
               ch = getNextChar();               // skip over quote
               symbol[ strlen( symbol ) - 1 ] = 0;
            }

            token = STRCONSTANT;
            break;

         // Read a numeric value from the input.  If you wanted, you could
         // modify this section to parse out floating point value.

         case '0': case '1': case '2': case '3': case '4':
         case '5': case '6': case '7': case '8': case '9' :
            hex = 0;
            ch = getNextChar();
            if (ch == 'x' || ch == 'X')
            {
               hex = 1;
               ch = getNextChar();
            }
            while (isdigit( ch ))
               ch = getNextChar();
            char* endptr;
            if (hex)
               tokenValue = strtoul( symbol, &endptr, 16 );
            else
               tokenValue = strtoul( symbol, &endptr, 10 );
            token = NUMCONSTANT;
            break;

      // Recognize special punctuation symbols

         case '(':
            ch = getNextChar();
            token = LP;
            break;
         case ')':
            ch = getNextChar();
            token = RP;
            break;
         case '+':
            ch = getNextChar();
            token = ADDOP;
            break;
         case '-':
            ch = getNextChar();
            token = SUBOP;
            break;
         case '!':
            ch = getNextChar();
            if (ch == '=')
            {
               ch = getNextChar();
               token = NEQOP;
            }
            else
               token = NOT;
            break;
         case '=':
            ch = getNextChar();
            if (ch == '=')
            {
               ch = getNextChar();
               token = EQOP;
            }
            else
               token = EQOP;
            break;
         case '<':
            ch = getNextChar();
            if (ch == '=')
            {
               ch = getNextChar();
               token = LEQOP;
            }
            else
               if (ch == '>')
               {
                  ch = getNextChar();
                  token = NEQOP;
               }
               else
                  token = LESOP;
            break;
         case '>':
            ch = getNextChar();
            if (ch == '=')
            {
               ch = getNextChar();
               token = GEQOP;
            }
            else
               token = GTOP;
            break;
         case '|':
            ch = getNextChar();
            if (ch == '|')
               ch = getNextChar();
            token = OR;
            break;
         case '&':
            ch = getNextChar();
            if (ch == '&')
               ch = getNextChar();
            token = AND;
            break;
      };
   } while (ch && token < 0);
   if (debug && token != ENDOFLINE)
      printf( "Symbol = %s, Token = %i\n", symbol, token );
   return token;
};
