/* convert.c
 * This file is split off to contain the kana<->romaji routines.
 */

#include <X11/Xlib.h>
#include <X11/Intrinsic.h>
#include <X11/keysym.h>

#include <stdio.h>

#include "defs.h"
#include "convert.h"
#include "search.h"

/* 
 * Note.. these state indicators, are for parsing
 * romaji from RIGHT to LEFT
 */
enum { STATE_NONE, STATE_A, STATE_I, STATE_U, STATE_E, STATE_O,
		 STATE_HI, /* for chi or shi */
		 STATE_SU, /* for tsu */
		 STATE_SPACE,  /* for lone 'n' at end */
		 STATE_OTHER
} ;


/* Order of these is very important. DO NOT CHANGE */
enum  {ADD_NONE, ADD_A, ADD_I, ADD_U, ADD_E, ADD_O};
/*enum  {ADD_NONE, ADD_YA, ADD_FILL1, ADD_YU, ADD_FILL2, ADD_YO};*/


/* map for converting kana to romaji */
static char * kanamap[128]=
{
	"",   "",   "",   "",   "",   "",   "",   "",
	"",   "",   "",   "",   "",   "",   "",   "",


	"",   "",   "",   "",   "",   "",   "",   "",
	"",   "",   "",   "",   "",   "",   "",   "",  

	"",   "",   "a",  "",   "i",  "",   "u",  "",
	"e",  "",   "o",  "ka", "ga", "ki", "gi", "ku",

	"gu", "ke", "ge", "ko", "go", "sa", "za", "shi",
	"zhi", "su", "zu", "se", "ze", "so","zo", "ta",

	"da", "chi","ji", "",   "tsu","zu", "te", "de",
	"to", "do", "na", "ni", "nu", "ne", "no", "ha",
	
	"ba", "pa", "hi", "bi", "pi", "fu", "bu", "pu",
	"he", "be", "pe", "ho", "bo", "po", "ma", "mi",
	
	"mu",  "me",  "mo","",  "ya", "",   "yu", "",
	"yo",  "ra",  "ri","ru","re", "ro", "",   "wa"

	"",    "",    "o", "n", "",   "",   "",   "",
	"",    "",    "",  "",  "",   "",   "",   "",

};




/* Take a kana string as input. Skip along, converting kana to
 * romaji. Assume 0x24 or 0x25 means kana or kanji start.
 * Assume estring buffer is "long enough".
 * Note that we also have to special-case punctuation. sigh.
 *  "(",     ")"
 *  0x214a,  0x214b
 *
 * All this, is so we can print out the "reading" entries, for folks
 * who can't read kana.
 */
void kanatoromaji(XChar2b *kstring, char *estring)
{
	char *eptr = estring;
	XChar2b *kptr = kstring;
	int lastchar=0;	/* for state machine */

	eptr[0]='\0';
	
	do
	{

		/* how boring.. punctuation */
		if(kptr->byte1 == 0x21)
		{
			int val=kptr->byte2;
			/* print character before this punc */
			strcat(eptr, kanamap[lastchar]);

			lastchar=0;
			kptr++;
			switch(val)
			{
			   case 0x21:
				strcat(eptr," ");
				continue;
			   case 0x41:
				strcat(eptr,"-");
				continue;
			   case 0x4a:
			   case 0x5a:
				strcat(eptr,"(");
				continue;
			   case 0x4b:
			   case 0x5b:
				strcat(eptr,")");
				continue;
			}
			printf("warning.. kanatoromaji found 0x21,0x%x",
			       val);
			continue;
			
		}
		if((kptr->byte1 != 0x24 ) && (kptr->byte1 != 0x25 )
		   && (kptr->byte1 != 0x0))
		{
			printf("warning... 0x%x%x found in kanatoromaji\n",
			       kptr->byte1,kptr->byte2);
			kptr++;
			continue;
		}

		/******************************************************/


		/* This is a wierd parsing routine.. Try to ignore it!*/
		if(kptr->byte2>128)
		{
			strcat(eptr, "[bad byte2 found]");
			kptr++;
			continue;
		}
		switch(kptr->byte2)
		{
		   case 0x63:
		   case 0x65:
		   case 0x67:
			/* small ya,yu,yo. Need to backtrack */
			/* fall out of switch, and do so, below */
			/* This is a "goto", in essence */
			break;

		   case 0:
		   default:
			/* print previous char!... */
			/* then save THIS char,
			 * and continue through loop..
			 */
			strcat(eptr, kanamap[lastchar]);
			lastchar = kptr->byte2;
			kptr++;
			continue;
			/* hopefully, will continue through while loop */

		}

		/* Must be on ya,yu,yo */
		/* Put the appropriate prefix */
		/* Later, will append "a", "u", or "o" */

		switch(lastchar)
		{
		   case 0x2d:	/*ki*/
			strcat(eptr,"ky");
			break;
		   case 0x2e:	/*gi*/
			strcat(eptr,"gy");
			break;
		   case 0x37:	/*shi*/
			strcat(eptr,"sh");
			break;
		   case 0x38:	/* shi''*/
			strcat(eptr,"j");
			break;
		   case 0x41:	/*chi*/
			strcat(eptr,"ch");
			break;
		   case 0x4b:	/*ni*/
			strcat(eptr,"ny");
			break;
		   case 0x52:	/*hi*/
			strcat(eptr,"hy");
			break;
		   case 0x53:	/*bi*/
			strcat(eptr,"by");
			break;
		   case 0x54:	/*pi*/
			strcat(eptr,"py");
			break;
		   case 0x5f:	/*mi*/
			strcat(eptr,"my");
			break;
		   case 0x6a:	/*ri*/
			strcat(eptr,"ry");
			break;
		   default:
			/*strcat(eptr, "[bad placement of ya/yu/yo]");*/
			/* oh well.. just print it as-is */
			/* but mark it as small */
			strcat(eptr, "_y");
			break;
		}
		switch(kptr->byte2)
		{
		   case 0x63:
			strcat(eptr, "a");
			break;
		   case 0x65:
			strcat(eptr, "u");
			break;
		   case 0x67:
			strcat(eptr, "o");
			break;
			/* must be ya,yu,yo */
		}
		lastchar = kptr->byte2;
		kptr++;

	} while (kptr->byte1 != 0);
	strcat(eptr, kanamap[lastchar]);

}



/* romajitokana
 * This is VERY ugly.
 * It's a grammar machine, hardcoded in C.
 * We do a right-first parse, on a XChar2b string.
 * Look at trailing pseudo-english string, and convert to kana.
 * Only convert ONE logical unit, longest match.
 * (So longest would be something like "kya". One consonant, plus "ya")
 *
 * Terminating char is always one of 'a','i','u','e','o'.
 * (or special cases, "n ", "-", ","
 *
 * Don't do anything otherwise.
 *
 * Assume null-terminated string. sigh.
 */

void romajitokana(XChar2b *kstart)
{
	XChar2b *kparse;
	int y_state=0;
	int state = STATE_NONE;
	int addchar = ADD_NONE;
	int consonant=0;
	/* consonant*5  is starting index into stdtranslations[].
	 * Addchar then completes the final index.
	 * Basically, consonant represents "k","s","t", ....
	 */
				
	int stoploop=0;
	
	/* start off by looking for pseudo-8-bit chars */
	while(kstart->byte1 != 0x23)
	{
		if(kstart->byte1 == 0)
		{
			/* NO non-kana CHARS! so just return */
			return;
		}
		kstart++;
	}

	/* kstart now has first 8-bit char */
	kparse = kstart;

	/* now find last 8-bit char */
	while(kparse[1].byte1 != 0)
		kparse++;


	if(kparse->byte1 != 0x23)
		return;

	/*  Check for a "normal" end-of-fragment.
	 * Only bother further parsing if we can do something useful.
	 */

	switch(kparse->byte2)
	{
	   case 'a':
	   case 'i':
	   case 'u':
	   case 'e':
	   case 'o':
			/* the above are all "normal" endings */
			
	   case ' ':	/* special terminator for 'n' */
	   case '\'':	/* special case for small-tsu */
	   case '-':	/* special case for hyphen (to extend vowel) */
		break ;
		
	   default:
		/* cant do anything without a proper terminating char */
		return;
	}


	/************************************************/
	/* Now do all the icky parsing stuffs.		*/
	/* We look at a 16-bit string from right to left.
	 * Unfortunately, it is actually a mix of true 16-bit chars,
	 * and ascii mapped onto 16-bit chars (byte2 possibly having ascii)
	 */
	do
	{
		/* Oops. Too far back. Go forward again. */
		if(kparse <kstart)
		{
			kparse = kstart;
			break;
		}
		if(kparse->byte1 != 0x23)
		{
			kparse++;
			break;
		}


		/********************************************************/
		/* Okay, do state machine.				*/
		/* Switch on possibility of seeing romaji		*/
		/********************************************************/
		switch(kparse->byte2)
		{
		   /************************************************/
		   /* The following are all the "final char" cases */
		   /************************************************/

		   case 'a':
			addchar=ADD_A;
			state = STATE_A;
			break;
		   case 'i':
			addchar=ADD_I;
			state = STATE_I;
			break;
		   case 'u':
			addchar=ADD_U;
			state = STATE_U;
			break;
		   case 'e':
			addchar=ADD_E;
			state = STATE_E;
			break;
		   case 'o':
			addchar=ADD_O;
			state = STATE_O;
			break;
		   case '\'': /* This is actually invisible.
			      * Thats okay!
			      * Convert to small-tsu
			      */
			consonant=15;
			state = STATE_NONE;
			addchar=3;
			stoploop=1;/* print char NOW */
			break;
		   case '-': /* This is actually invisible.
			      * Thats okay!
			      * Convert to kanakana "--"
			      */
			consonant=15;
			addchar=3;
			state = ADD_NONE;
			stoploop=1;/* print char NOW */
			break;


		/* The nasty ambiguous case:
		 *   'n' can be final char, as lone 'n',
		 *  OR be "na", "nu", etc.
		 *  Either way, stop parse here.
		 */
		   case 'n':
			if(state == STATE_SPACE)
			{
				/* hack for lone 'n' */
				consonant=15; /* this needs 15+1 */
				state = STATE_NONE;
				addchar=2;
			} else
			{
				consonant=4;
			}
			stoploop=1;

		   /******************************************/
		   /* Here is the consonant section          */
		   /******************************************/

			/* special-case "ji" instead of "zi" */
		   case 'j':
			y_state=1;
			switch(state)
			{
			   case STATE_I:
				y_state=0;
				break;
			}
			
			state = STATE_I;
			consonant=9;
			stoploop=1;
			break;


		   case 'k':
			consonant=1;
			stoploop=1;
			break;
		   case 's':
			if(consonant == 5) /* 'h' */
			{
				y_state=1;
				switch(state)
				{
				   case STATE_I:
					y_state=0;
					break;
			
				   default:
					return;
				}
				state = STATE_I;
				/* and fall through */
			}

			consonant=2;
			stoploop=1;
			break;

		   case 't':
			consonant=3;
			if(state == STATE_SU)
			{
				/* for "tsu" */
				state = STATE_U;
			}
			stoploop=1;
			break;
			/* special-case "chi" instead of "ti" */
		   case 'c':
			if(consonant != 5)
			{
				stoploop=1;
				break;
			}
			if(consonant == 5) /* 'h' */
			{
				y_state=1;
				switch(state)
				{
				   case STATE_I:
					y_state=0;
					break;

				   default:
					return;
				}
				state = STATE_I;
				/* and fall through */
			}

			consonant=3;
			stoploop=1;

			break;
		   case 'h':
			consonant=5;
			/*stoploop=1;*/
			break;
		   case 'm':
			consonant=6;
			stoploop=1;
			break;
		   case 'r':
			consonant=7;
			stoploop=1;
			break;
		   case 'g':
			consonant=8;
			stoploop=1;
			break;

		/********************************************************/
		/* Here's the "wierd" section, that are technically	*/
		/* modifications of previous chars			*/
		/********************************************************/
		   case 'z':
			consonant=9;
			stoploop=1;
			break;
		   case 'd':
			consonant=10;
			stoploop=1;
			break;
		   case 'b':
			consonant=11;
			stoploop=1;
			break;
		   case 'p':
			consonant=12;
			stoploop=1;
			break;

		   case 'w':
			consonant=15;
			stoploop=1;
			break;

		/********************************************************/
		/* Special handling for "ya/yu/yo			*/
		/* It can be "terminal", OR combined with certain	*/
		/* consonants						*/
		/********************************************************/


		   case 'y':
			y_state=1;
			switch(state)
			{
			   case STATE_A:
			   case STATE_U:
			   case STATE_O:
				break;
			   default:
				puts("strange y+ in conversion");
				return;
			}
			/* Y is the "anti-consonant" ;-)
			 * consonant == -1 is a special case.
			 * Remember, there may well be a real consonant
			 * after this; "kya", "jyo", etc
			 */
			consonant=-1;
			break;

		   case ' ':
			state = STATE_SPACE;
			break;

		}

		/* Now go check char to left of one we just looked at */
		kparse--;
	} while(!stoploop);


	/* counter auto-(kparse--) above, just before while() test */
	/* This is to contrast if we used "break" to get out here */
	if(stoploop==1)
	{
		kparse++;
	} 


	/* kparse is now pointing to first romaji char we
	 * intend to rewrite as hiragana
	 */


	/****************************************************************/
	/* Keep in mind.. we may have to write one, or TWO chars	*/
	/* Two chars would be neccessary if y_state is set		*/
	/****************************************************************/

	
	/* Hmm.. if anti-consonant, assume lone ya/yu/yo
	 */
	if(consonant == -1)
	{
		if(addchar != ADD_NONE)
		{
			/* PLAIN YA/YU/YO */
			*kparse = std_translations[13 * 5 + addchar -1][0];
			kparse[1] = std_translations[0][1];
			return;
		}

		/* else */
		consonant=0;


	}
	
	/* We have a consonant, and possibly a ya/yu/yo */

	if(y_state == 1)
	{
		/* TWO chars.. trailing  ya/yu/yo */
		*kparse = std_translations[(consonant*5) + 1][0];
		kparse++;

		consonant=14; /* lead to ya/yu/yo*/
	}

	*kparse = std_translations[(consonant*5) + addchar -1][0];


	/* now add null-termination */
	kparse[1] = std_translations[(consonant*5) + state-1][1];
	return;
}


	

	

/************************************************************/
/* we had BETTER NOT have more than 50 chars!!              */
#define MAXROMAJI 50
/*static XChar2b romajidone[MAXROMAJI]={{0,0}};*/

/* Changed my mind. This is an event handler 
 * Gets called every time a key is pressed in the romajikana window.
 * ALSO, handles when 
 */
void Handle_romajikana(Widget w, XtPointer closure, XEvent *e, Boolean *cont)
{
	XKeyEvent *key_event;
	KeySym inbetweenK;
	char *charpressed;
	XChar2b addchar;
	
	if(e->type != KeyPress)
	{
		if(e->type == KeyRelease)
		{
			puts("key released");
			return;
		}
		printf("Some other strange event found in for romaji: %d\n",
		       e->type);
		return;
	}
	key_event = (XKeyEvent *) e;
	
	inbetweenK = XKeycodeToKeysym(XtDisplay(w), key_event->keycode,0);
	if(inbetweenK == (KeySym)NULL)
	{
		puts("NULL keysym on kana input???");
		return;
	}

	/* we switch based on what character has just been
	 * pressed on an ASCII-based keyboard
	 */
	switch(inbetweenK)
	{
	   case XK_BackSpace:
	   case XK_Delete:
		addchar.byte1 = 0x22;
		addchar.byte2 = 0x2b;
		process_kinput(addchar);
		return;
	   case XK_Return:
	   /* pass our strange "accept" char*/
		addchar.byte1 = paragraphglyph[0].byte1;
		addchar.byte2 = paragraphglyph[0].byte2;
		process_kinput(addchar);
		return;
	   case XK_space:
		/* add " ", but in kana range */
		/* This is a nasty hack to get "n" right */
		addchar.byte1 = 0x23;
		addchar.byte2 = ' ';
		process_kinput(addchar);
		return;
	   case XK_minus:
		/* add "-", but in kana range
		 * (for kanakana vowel extension )
		 * Note that this "char" is invisible.
		 */
		addchar.byte1 = 0x23;
		addchar.byte2 = '-';	/* 0x2d */
		process_kinput(addchar);
		return;
	   case XK_apostrophe:
		/* hack to get "small tsu" to work.
		 * Note that this "char" is invisible.
		 */
		addchar.byte1 = 0x23;
		addchar.byte2 = '\'';	/* 0x3e*/
		process_kinput(addchar);
		return;
		
	}
	charpressed = XKeysymToString(inbetweenK);
	if(charpressed == NULL)
		return;


#ifdef DEBUG
	printf("got string \"%s\"\n", charpressed);
#endif		
	/* now use process_kinput, 222b is erase */
	if((*charpressed <0x61) || (*charpressed >0x7a))
	{
		/* outside range of ascii chars we like */
#ifdef DEBUG
		puts("ignoring.. not in normal ascii range");
#endif
		return;
	}
	
	addchar.byte1 = 0x23;
	addchar.byte2 = *charpressed;
	process_kinput(addchar);

}
