/*////////////////////////////////////////////////////////////////////////
Copyright (c) 1998 Electrotechnical Laboratry (ETL), AIST, MITI
Copyright (c) 1998 Yutaka Sato

Permission to use, copy, modify, and distribute this material for any
purpose and without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies, and
that the name of ETL not be used in advertising or publicity pertaining
to this material without the specific, prior written permission of an
authorized representative of ETL.
ETL MAKES NO REPRESENTATIONS ABOUT THE ACCURACY OR SUITABILITY OF THIS
MATERIAL FOR ANY PURPOSE.  IT IS PROVIDED "AS IS", WITHOUT ANY EXPRESS
OR IMPLIED WARRANTIES.
/////////////////////////////////////////////////////////////////////////
Content-Type:	program/C; charset=US-ASCII
Program:	TLEX.c (Tiny LEXical analizer)
Author:		Yutaka Sato <ysato@etl.go.jp>
Description:
History:
	860408	created as a part of scanner generator of COSMOS
	921116	added SMALL type state.
	980414	modified to be independent of COSMOS
	980604	added atbitrary string "**", char "[]" and frex_*()
//////////////////////////////////////////////////////////////////////#*/

#ifndef NULL
#define NULL 0L
#endif

static char *ctos(ch,str)
	char *str;
{
	if( 0x20 < ch && ch < 0x7F )
		sprintf(str,"'%c'",ch);
	else	sprintf(str,"%02xH",ch);
	return str;
}

#ifdef MAIN
#define syslog_DEBUG	printf
#define syslog_ERROR	printf
main(){ fa_test(); }
#endif

/*######################################################*
 *							*
 *	FINITE AUTOMATON GENERATOR AND INTERPRETER	*
 *		Y.Sato	1986,4/8			*
 *							*
 *######################################################*/

#define CHARSETSIZE	256
typedef struct {
	struct fa_stat *nvec[CHARSETSIZE];
} FaVect;

#define FA_SMALL	4
typedef struct fa_stat {
	int		stid;	/* Status Id-Number		*/
	int		class;	/* Class of expression end here	*/
	int		eclass; /* class for strings end here	*/
	struct fa_stat *plink;	/* Link to the previous status	*/
	struct fa_stat *alink;	/* Link to the last allocated status */

	unsigned	upper;	/* Upper bound of input		*/
	unsigned	lower;	/* Lower			*/

	int		nnexts;
	unsigned char	in_ch[FA_SMALL];
	struct fa_stat *nstat[FA_SMALL];
	FaVect	       *Nstat;	/* for large sets		*/
	struct fa_stat *other;	/* back track to "arbitrary string" */
} FaStat;
#define PREV(fsp)		( fsp->plink )

#define NEXT(fsp,ch) \
	( fsp->Nstat ? fsp->Nstat->nvec[ch] : fsp->nnexts ? next(fsp,ch) : 0 )

static FaStat*
next1(fsp,ch)
	FaStat *fsp;
{	FaStat *nfsp;
	int si;
	unsigned char *in_ch = fsp->in_ch;

	if( fsp->Nstat ){
		if( nfsp = fsp->Nstat->nvec[ch] )
			return nfsp;
	}else{
		for( si = 0; si < FA_SMALL; si++ )
			if( in_ch[si] == ch )
				return fsp->nstat[si];
	}
	return 0;
}

static int fa_backed;
static FaStat*
next(fsp,ch)
	FaStat *fsp;
{	register int si;
	FaStat *nfsp,*xfsp;

	fa_backed = 0;
	if( nfsp = next1(fsp,ch) )
		return nfsp;

	if( nfsp = fsp->other ){
		if( xfsp = next1(nfsp,ch) ){
			fa_backed = 1;
			return xfsp;
		}
		if( nfsp != fsp )
			fa_backed = 1;
	}
	return nfsp;
}
static SETNEXT(fsp,ch,nfsp)
	FaStat *fsp,*nfsp;
{	register int si;

	if( fsp->nnexts < FA_SMALL ){
		fsp->in_ch[fsp->nnexts] = ch;
		fsp->nstat[fsp->nnexts] = nfsp;
	}else{
		if( fsp->Nstat == 0 ){
			fa_stvec_alloc(fsp);
			for( si = 0 ; si < FA_SMALL; si++ )
				SETNEXT(fsp,fsp->in_ch[si],fsp->nstat[si]);
		}
		fsp->Nstat->nvec[ch] = nfsp;
	}
	fsp->nnexts++;
}

#define NONEXT(fsp) (fsp->nnexts == 0)

typedef struct {
	FaStat *Fa_root;	/* Start Status		*/
	FaStat *Fa_last;	/* Last Created Status	*/
	int	 Fa_stid;	/* Last Created stid	*/
	int	 Fa_metachar;	/* Meta character enable*/
} FA;
static FA DefaultFA;
static FA *CurrentFA = &DefaultFA;
#define fa_root		(CurrentFA->Fa_root)
#define fa_last		(CurrentFA->Fa_last)
#define fa_stid		(CurrentFA->Fa_stid)
#define fa_metachar	(CurrentFA->Fa_metachar)

static FaVect *
allocFaVect(){
	return (FaVect*)calloc(1,sizeof(FaVect)+256);
}
static FaStat *
allocFaStat(){
	return (FaStat*)calloc(1,sizeof(FaStat));
}

/*##############################*
 *	CONSTRUCT FUNCTIONS	*
 *##############################*/
FaStat *
fa_init(fsp)
	FaStat *fsp;
{
	if(fsp)	fa_root = fsp;
	else{
		fa_root = allocFaStat();
		fa_last = 0;
		fa_stid = 0;
	}
	return(fa_root);
}

fa_free(fsp)
	FaStat *fsp;
{	FaStat *fsp1,*nfsp;

	for( fsp1 = fsp->alink; fsp1; fsp1 = nfsp ){
		nfsp = fsp1->alink;
		syslog_DEBUG("#%d freed %x\n",fsp1->stid,fsp1->Nstat);
		if( fsp1->Nstat ) free(fsp1->Nstat);
		free(fsp1);
	}
	syslog_DEBUG("#%d freed %x\n",fsp->stid,fsp->Nstat);
	if( fsp->Nstat ) free(fsp->Nstat);
	free(fsp);
	if( fa_root = fsp )
		fa_root = NULL;
}

FaStat*
fa_stnew(ofsp)
	FaStat *ofsp;
{	FaStat *fsp;

	fsp = allocFaStat();
	fsp->alink = ofsp->alink;
	ofsp->alink = fsp;

	fsp->stid = ++fa_stid;
	if(fa_last)
		PREV(fsp) = fa_last;
	else	PREV(fsp) = fa_root;
	fa_last = fsp;
	return(fsp);
}

/*##############################*
 *	OBSERVATION FUNCTIONS	*
 *##############################*/
fa_dump(fsp)
	FaStat *fsp;
{	register int i,j,k;
	char buf1[10],buf2[10];

	syslog_DEBUG("#%-2d:[%2x-%2x](%d)",fsp->stid,fsp->lower,fsp->upper,
		fsp->nnexts);
	if( fsp->other )
		syslog_DEBUG(" %3d<=* ",fsp->other->stid);
	if(fsp->class)
		syslog_DEBUG(":%3d",fsp->class);
	syslog_DEBUG("\n");
	if(NONEXT(fsp))
		return;

	if( fsp->nnexts < FA_SMALL ){
		for( j = 0; j < fsp->nnexts; j++ ){
			int ch,stid;
			ch = fsp->in_ch[j];
			stid = fsp->nstat[j]->stid;
			syslog_DEBUG("    -------    #%-2d <- ",stid);
			syslog_DEBUG("[%s]\n",ctos(ch,buf1));
		}
		return;
	}

	for(i = 0; i <= fa_stid; i++)
	for(j = fsp->lower; j < fsp->upper; j++){
	    if(NEXT(fsp,j) && (NEXT(fsp,j)->stid == i)){
		syslog_DEBUG("    -------    #%-2d <- ",i);

		for(k = j; ((k+1) < fsp->upper) && NEXT(fsp,k+1); k++)
			if(NEXT(fsp,k+1)->stid != i)
				break;

		if(j < k)
			syslog_DEBUG("[%s-%s]\n",ctos(j,buf1),ctos(k,buf2));
		else	syslog_DEBUG("%s\n",ctos(j,buf1));
		j = k;
	    }
	}
}
fa_list(fsp)
	FaStat *fsp;
{
	if(fsp){
		fa_list(PREV(fsp));
		fa_dump(fsp);
	}
}
fa_ls(){fa_list(fa_last);}

/*##############################*
 *	FA SCANNER		*
 *##############################*/
/*
	 if(DBG_FA & 1){
		char cbuf[10],sbuf[100],pbuf[100],i;

		for(i = 0; i < 20; i++) sbuf[i] = s[i];
		sbuf[i] = sbuf[40] = 0;
		suparse(sbuf,pbuf);
		printf("%3d <-[%2s]- %3d  | %s\n",
		(0 < NEXT(fsp,*s)) ? NEXT(fsp,*s)->stid : -1,
			ctos(*s,cbuf),
			fsp->stid,
			pbuf
		);
	}
*/

typedef struct {
	FaStat		*m_fsp;
	unsigned char	*m_top;
	unsigned char	*m_next;
} Match;

static fa_scanX(fsp,input,start,tail,shortest)
	register FaStat *fsp;
	unsigned char *input,**start,**tail;
{	register unsigned char *ss,cc;
	register FaStat *nfsp;
	FaStat *asp;
	unsigned char *lsp; /* the first non-wildcard match point */
	Match lastmatch;

	if( fsp == NULL )
		fsp = fa_root;

	/* asp = the entry state for preamble wildcard if exists */
	if( fsp->other == fsp )
		asp = fsp->other;
	else	asp = 0;

	if( start ){
		if( asp == 0 || fsp->class || fsp->eclass )
			lsp = input;
		else	lsp = 0;
	}

	lastmatch.m_fsp = 0;
	for( ss = input; cc = *ss; ss++ ){
		if( fsp->class ){
			if( shortest )
				break;
			lastmatch.m_fsp = fsp;
			lastmatch.m_top = lsp;
			lastmatch.m_next = ss;
		}

		nfsp = next(fsp,cc);
		if( nfsp == NULL || fa_backed ){
			if( lastmatch.m_fsp )
				break;
		}
		if( fa_backed )
			lsp = 0;

#ifdef DEBUG
printf("#### %X %X[%d] %8X %X %s\n",fsp,nfsp,fa_backed,lsp,ss,ss);
#endif

		if( nfsp == NULL ){
			if( NONEXT(fsp) && tail && fsp->other )
				while(*ss) ss++;
			break;
		}
		fsp = nfsp;
		if( start && asp != 0 ){
			if( lsp == 0 ){
				if( fsp != asp ) lsp = ss;
			}else{
				if( fsp == asp ) lsp = 0;
			}

		}
	}
	if( fsp->class == 0 && !(*ss == 0 && fsp->eclass) ){
		if( lastmatch.m_fsp ){
			fsp = lastmatch.m_fsp;
			lsp = lastmatch.m_top;
			ss = lastmatch.m_next;
		}
	}

	if( start ){
		if( lsp == 0 && *ss == 0 && fsp->eclass )
			lsp = ss;
		*start = lsp;
	}
	if( tail )
		*tail = ss;

	if( *ss == 0 && fsp->eclass )
		return fsp->eclass;

	return fsp->class;
}
fa_scan(fsp,input,tail)
	register FaStat *fsp;
	unsigned char *input,**tail;
{
	return fa_scanX(fsp,input,NULL,tail,0);
}


static char *ibsym = "";
fa_setup_buff(rembuf) char *rembuf; { ibsym = rembuf; }
fa_unget_buff(str){ strcat(ibsym,str); }
fa_clear_buff(){ *ibsym = 0; }
fa_iscan(sym,rem)
char *sym,*rem;
{	register FaStat *fsp;
	register int class;
	unsigned char ic;

	class = 0;
	fsp = fa_root;

	for(;;){
		if(ic = *ibsym)	strcpy(ibsym,ibsym+1); else
		if(ic = *rem)	strcpy(rem,rem+1); else
		scanf("%c",&ic);

		if(NONEXT(fsp)){
			class = fsp->class;
			*sym++ = ic;
			break;
		}
		fsp = NEXT(fsp,ic);
		if(fsp == 0){
			*rem = ic;
			break;
		}

		if(fsp->class)
			class = fsp->class;

		*sym++ = ic;
		if(NONEXT(fsp))
			break;
	}

	if(class == 0){ strcpy(sym,rem); *rem = 0; }
	else		*sym = 0;
	return(class);
}
/*
fais(){
	char sym[100],rem[10];
	int class;

	rem[0] = rem[1] = 0;
	do{
		class = fa_iscan(sym,rem);
		syslog_DEBUG("%d sym[%s] rem[%s]\n",class,sym,rem);
	}while(*sym != '.');
}
*/

/*##############################*
 *   FA GENERATOR/INTERPRETER	*
 *##############################*/
fa_stcopy(fsfrom,fsto)
	FaStat *fsfrom,*fsto;
{
	fsto->lower = fsfrom->lower;
	fsto->upper = fsfrom->upper;
	fsto->Nstat = allocFaVect();
	if( fsto->Nstat && fsfrom->Nstat )
	*(fsto->Nstat) = *(fsfrom->Nstat);
}
static
fa_stvec_alloc(fsp)
	FaStat *fsp;
{
	fsp->lower = 0;
	fsp->upper = CHARSETSIZE;
	fsp->Nstat = allocFaVect();
}

FaStat*
fa_stins(ofsp,fsp,nfsp,in)
	FaStat *ofsp,*fsp,*nfsp;
	unsigned char in;
{	FaStat *cfsp;

	if(NEXT(fsp,in)){
		if(nfsp){
			if(NEXT(fsp,in) != nfsp){
			syslog_DEBUG("FA_stins: NON-DETERMINISTIC [%d]%c->?\n",
				fsp->stid,in);
			}
		}else{
			if(NEXT(fsp,in) == fsp){
				cfsp = fa_stnew(ofsp);
				SETNEXT(fsp,in, cfsp);
				fa_stcopy(fsp,cfsp);
			}
		}
	}else{
		if(nfsp == 0){
			cfsp = fa_stnew(ofsp);
			SETNEXT(fsp,in, cfsp);
		}else	SETNEXT(fsp,in, nfsp);
	}
	return NEXT(fsp,in);
}

/*##############################*
 *	FA GENERATOR		*
 *##############################*/
fa_gen(regexp,class)
	char *regexp;
{
	return fa_rexp(fa_root,regexp,class);
}

static unsigned char *
fa_macro_exp(regexp,regexpb,inmeta)
	unsigned char *regexp,*regexpb;
{	unsigned char *sp,*tp,pat[1000],id[1000],temp[1000],*rstr;

	if( (regexp[0] == '$') && (regexp[1] == '<') ){
		sp = regexp + 2;
		if( tp = (unsigned char*)strchr(sp,'>') ){
			strncpy(id,sp,tp-sp);
			id[tp-sp] = 0;
/*
			rstr = (unsigned char*)tcap_s(id);
			if( *rstr ){
				sprintf(pat,"$<%s>",id);
				strsubs(regexp,temp,pat,rstr);
				if( inmeta )
					sprintf(regexpb,"$%s$",temp);
				else	strcpy(regexpb,temp);
				return regexpb;
			}
*/
		}
	}
	return regexp;
}

static expset(lrep,rrep,set)
	unsigned char *lrep,*rrep,*set;
{	int negate,setx;
	char bset[512];
	int ch1,ch2,ch;

	if( *lrep == '^' ){
		lrep++;
		negate = 1;
	}else	negate = 0;

	for( ch = 0; ch < 256; ch++ )
		bset[ch] = negate ? 1 : 0;

	for(; lrep < rrep; lrep++ ){
		ch1 = lrep[0];
		if( lrep[1] == '-' ){
			ch2 = lrep[2];
			lrep += 2;
			if( ch2 < ch1 )
				continue;
		}else	ch2 = ch1;

		for( ch = ch1; ch <= ch2; ch++ )
			bset[ch] = negate ? 0 : 1;
	}

	setx = 0;
	for( ch = 0; ch < 0x80; ch++ ){
		if( bset[ch] )
			set[setx++] = ch;
	}
	set[setx] = 0;
	return setx;
}

static int fa_nosetrep;
fa_rexp(ofsp,regexp,class)
	FaStat *ofsp;
	unsigned char *regexp;
{	unsigned char *rep,*lrep;
	char set[512],setx,ch;
	FaStat *fsp,*fsps;
	int metachar_on = fa_metachar;
	char regexpb[2000];
	FaStat *lastany;

	lastany = 0;
	fsp = ofsp;
	for(rep = regexp; *rep; rep++){

#ifdef DEBUG
printf("#### %X %X %s\n",ofsp,fsp,rep);
#endif

	  if(*rep == '$' && rep[1] == 0 ){
		fsp->eclass = class;
		return (int)fsp;
	  }
	  if(*rep == '$')
		rep = fa_macro_exp(rep,regexpb,metachar_on);

	  if(*rep == '$'){
		if((rep[1] == '$')||(rep[1] == 0))
			fsp = fa_stins(ofsp,fsp,0,*rep);
		else	metachar_on = !metachar_on;

	  }else if( !metachar_on )
		fsp = fa_stins(ofsp,fsp, 0 ,*rep);

	  else switch(*rep){
		case '*':
			if( rep[1] == '*' ){
				fsp->other = fsp;
				lastany = fsp;
				rep++;
			}
			break;

		case '[':
			lrep = ++rep;
			for(rep++; *rep; rep++)
				if(*rep == ']')
					break;

			if( lrep[0] == ']' ){
				fsps = fa_stnew(ofsp);
				fsp->other = fsps;
				fsp = fsps;
				break;
			}

			setx = expset(lrep,rep,set);

			if(rep[1] == '*' && !fa_nosetrep)
				fsps = fa_stins(ofsp,fsp,fsp,set[0]);
			else	fsps = fa_stins(ofsp,fsp,  0,set[0]);

			for( ch = 1; ch < setx; ch++ )
				fsps = fa_stins(ofsp,fsp,fsps,set[ch]);

			fsp = fsps;
			break;

		case '.':
		default:
			if(rep[1] == '*'&& rep[2] != '*' )
				fsp = fa_stins(ofsp,fsp,fsp,*rep);
			else	fsp = fa_stins(ofsp,fsp, 0 ,*rep);
			fsp->other = lastany;
	    }
	}
	fsp->class = class;
/*
fa_ls();
*/
	return (int)fsp;
}

FaStat *frex_append(fsp,rexp)
	FaStat *fsp;
	char *rexp;
{	char ch,*sp,*rp,rexpb[1024];

	rp = rexpb;
	for( sp = rexp; ch = *sp; sp++ ){
		switch( ch ){
			case '*': *rp++ = '*'; *rp++ = '*'; break;
			case '?': *rp++ = '['; *rp++ = ']'; break;
			default:  *rp++ = ch;  break;
		}
	}
	*rp = 0;

	if( fsp == NULL )
		fsp = fa_init(NULL);
	fa_metachar = 1;
	fa_nosetrep = 1;
	fa_rexp(fsp,rexpb,'F');
	fa_nosetrep = 0;
	fa_metachar = 0;

	syslog_DEBUG("frex_append(%s) = %X\n",rexpb,fsp);
	return fsp;
}
FaStat *frex_create(rexp)
	char *rexp;
{
	return frex_append((FaStat*)0,rexp);
}
char *frex_matchX(fsp,str,start)
	FaStat *fsp;
	char *str,**start;
{	char *tail;
	int class;

	class = fa_scanX(fsp,str,start,&tail,0);
	if( class == 'F' )
		return tail;
	else	return NULL;
}
char *frex_match(fsp,str)
	FaStat *fsp;
	char *str;
{
	return frex_matchX(fsp,str,NULL);
}
frex_free(fsp)
	FaStat *fsp;
{
	fa_free(fsp);
}

/*######################*
 *	TEST & Usage	*
 *######################*/
#ifdef MAIN
fa_test(){
	int i;
	char class,*tail,*next;
	char line[1024],word[32];
	FaStat *fsp;

	static struct {
		char	*rexp;
		char	class;
	} rexps[] = {
		"[_a-zA-Z][_a-zA-Z0-9]*",'I',
		"[1-9][0-9]*.[0-9]*",	'R',
		"0.[0-9]*",		'R',
		".[0-9]*",		'R',
		"0",			'D',
		"[1-9][0-9]*",		'D',
		"0[0-7][0-7]*",		'O',
		"0[xX][0-9a-fA-F]*",	'X',
		"-abcd",		'D',
		"-abCD",		'D',
		0
	};
	static char *syms[] = {
		"___1___",
		"_ab12XY",
		"123456.",
		"123.456",
		"0.12345",
		".123456",
		"0",
		"1234567",
		"0123456",
		"0x",
		"0x12abc",
		"0X12abc",
		"0X12abc+postfix",
		0
	};
	static struct {
		char	*rexp;
		char	class;
	} romkan[] = {
		" ",				'1',
		"[aiueo]",			'1',
		"[kstnhmyrwgzdbv][aiueo]",	'2',
		"[ksctnhmrgzdb][hy][aiueo]",	'3',
		"j[aiueo]",			'2',
		"n[n']",			'2',
		"chi",				'3',
		"tsu",				'3',
		"xtu",				'3',
		"xtsu",				'3',
		"x[aiueo]",			'2',
		"xy[aiueo]",			'3',
		0
	};
	static char *fsyms[] = {
		"abc123def456ghi",
		"xxabcxxdefxxghixx",
		"xxabcxxdefxxghi",
		"xxabcxxdefghi",
		"xxabcdefghi",
		"axxbcdefghi",
		"abxxcdefghi",
		"abcxxdefghi",
		"abcdxxefghi",
		"abcdexxfghi",
		"abcdefxxghi",
		"abcdefgxxhi",
		"abcdefghxxi",
		"abcdefghixx",
		"abcdefghi",
		"abcdefgh",
		0
	};

	fa_metachar = 1;

	fsp = frex_create("w*o");
	fa_ls();
	tail = frex_match(fsp,"windows.o");
	syslog_DEBUG("%s %x [%s]\n","windows.o",tail,tail?tail:"");
	tail = frex_match(fsp,"winserv.o");
	syslog_DEBUG("%s %x [%s]\n","winserv.o",tail,tail?tail:"");
	tail = frex_match(fsp,"windows.c");
	syslog_DEBUG("%s %x\n","windows.c",tail,tail);
	frex_free(fsp);
	getchar();

	fsp = frex_create("*abc*def*ghi*");
	fa_ls();
	for( i = 0; fsyms[i]; i++ ){
		tail = frex_match(fsp,fsyms[i]);
		syslog_DEBUG("(%2d/%2d): %s [%x]\n",
			tail?tail-fsyms[i]:0,strlen(fsyms[i]),
			fsyms[i],tail?*tail:-1);
	}
	frex_free(fsp);
	getchar();

	fsp = fa_init(NULL);
	syslog_DEBUG("#### GENERATE ####\n");
	for( i = 0; rexps[i].rexp; i++ ){
		syslog_DEBUG("%c : %-30s\n",rexps[i].class,rexps[i].rexp);
		fa_rexp(fsp,rexps[i].rexp,rexps[i].class);
	}

	syslog_DEBUG("#### STATUS ######\n");
	fa_ls();

	syslog_DEBUG("#### SCAN ########\n");
	for( i = 0; syms[i]; i++ ){
		class = fa_scan(fsp,syms[i],&tail);
		syslog_DEBUG("%c (%2d): %s\n",class, tail-syms[i], syms[i]);
	}
	fa_free(fsp);

	fsp = fa_init(NULL);
	for( i = 0; romkan[i].rexp; i++ )
		fa_rexp(fsp,romkan[i].rexp,romkan[i].class);

	for(;;){
		printf("ROMKAN>\n");
		if( gets(line) == 0 || line[0] == 0 )
			break;
		if( line[0] == 0 )
			break;
		for( next=line; 0 < (class=fa_scan(fsp,next,&tail)); next=tail ){
			strcpy(word,next);
			word[tail-next] = 0;
			printf("%c (%d) [%-3s] %s\n",class,tail-next,word,next);
		}
	}
	fa_free(fsp);
	fa_metachar = 0;
}
#endif
