/*////////////////////////////////////////////////////////////////////////
Copyright (c) 1994-1999 Yutaka Sato
Copyright (c) 1994-1999 Electrotechnical Laboratry (ETL), AIST, MITI

Permission to use, copy, and distribute this material for any purpose
and without fee is hereby granted, provided that the above copyright
notice and this permission notice appear in all copies.
ETL MAKES NO REPRESENTATIONS ABOUT THE ACCURACY OR SUITABILITY OF THIS
MATERIAL FOR ANY PURPOSE.  IT IS PROVIDED "AS IS", WITHOUT ANY EXPRESS
OR IMPLIED WARRANTIES.
/////////////////////////////////////////////////////////////////////////
Content-Type:	program/C; charset=US-ASCII
Program:	textconv.c (text / character code conversion)
Author:		Yutaka Sato <ysato@etl.go.jp>
Description:

    codeconv_spec = outcode [ "." incode ]

    outcode       = codename [ "_" extension ]

    incode        = codename [ "_" extension ]

    codename      = [0-9A-Za-z]+

    extension     = [0-9A-Za-z]+

    Example:

    	jis		== jis.jp : japanese input to  JIS (ISO-2022-JP)

History:
	941001 extracted from conf.c and gopher.c
	960105 introduced codeconv_spec and cascaded conversions

//////////////////////////////////////////////////////////////////////#*/
#include "delegate.h"

int suppress_codeconv;
extern int PLAIN_TO_HTML_PRE;

#define CC_THRU	'='	/* Through, without conversion (output only) */
#define CC_ANY	'*'	/* any encoding (input only) */
#define CC_HTML	'H'	/* HTML character encoding */
#define CC_EUC	'E'	/* EUC (EUC-JP) */
#define CC_euc	'e'	/* EUC-JP */
#define CC_JIS	'J'	/* JIS (ISO-2022-JP) */
#define CC_SJIS	'S'	/* SJIS (Shift-JIS, MS-Kanji) */
#define CC_sjis	's'	/* Shift_JIS */
#define CC_RECV	'R'	/* Recover lost ESCs */
#define CC_FIX	'F'	/* Fix lost ESC $ B */
#define CC_UTF8	'U'	/* utf-8 */

static char *codename(code)
{
	switch( code ){
		case CC_JIS:  return "iso-2022-jp";
		case CC_EUC:  return "x-euc-jp";
		case CC_euc:  return "EUC-JP";
		case CC_SJIS: return "x-sjis";
		case CC_sjis: return "Shift_JIS";
		case CC_UTF8: return "utf-8";
	}
	return NULL;
}
#define CTX_codeconv_charset(Conn) codename(CTX_cur_codeconvCL(Conn,NULL))

#define JIS_BUFSIZ(sz)	(sz * 2 + 1000)

typedef struct {
	char	*cv_what; /* realm of the conversion */
	char	*cv_spec; /* conversion spec. source */
	int	 cv_occe; /* output charcode encoding */
	char	*cv_occs; /* output charcode spec. */
	int	 cv_icce; /* input charcode encoding */
	char	*cv_iccs; /* input charcode spec. */
} Conv;
typedef struct {
	Conv	te_CodeConv[32];
	Conv	te_cv0;
} TextConvEnv;
static TextConvEnv *textConvEnv;
#define CodeConv textConvEnv->te_CodeConv
#define cv0	textConvEnv->te_cv0
static Conv Conv0 = {"*","THRU",CC_THRU,"=",CC_ANY,"*"};
minit_textconv()
{
	if( textConvEnv == 0 ){
		textConvEnv = NewStruct(TextConvEnv);
		CodeConv[0] = Conv0;
	}
}

#define CONVX0	1
int CodeConv_X = CONVX0;	/* global */
int CodeConv_x;		/* local to connection */
#define CONVX	(CodeConv_X + CodeConv_x)

static scan_CHARCODE1(charcode,local)
	char *charcode;
{	char ccb[256],*incode,*outcode,*dp;
	char spec[256];

	strcpy(ccb,charcode);
	if( dp = strchr(ccb,'/') ){
		*dp++ = 0;
		outcode = dp;
		incode = ccb;
	}else
	if( dp = strchr(ccb,'.') ){
		*dp++ = 0;
		incode = dp;
		outcode = ccb;
	}else{
		incode = "JP";
		outcode = ccb;
	}

	sprintf(spec,"%s.%s",outcode,incode);
	scan_CODECONV(spec,CCV_TOCL,local);
	return 0;
}
scan_CHARCODE(Conn,charcodes)
	Connection *Conn;
	char *charcodes;
{
	scan_commaList(charcodes,0,scan_CHARCODE1,0);
}

static
scan_codename(str,name)
	char *str,*name;
{
/*
	anwordscan(str,name);
*/
	sscanf(str,"%[-_a-zA-Z0-9]",name);
}
scan_CODECONV(spec,what,local)
	char *spec,*what;
{	Conv *cv;
	char *dp,outcx[128],outcs[128],incx[128],incs[128];
	char *cvspec;
	int oc,ic;

	if( 0 < CONVX && strcmp(CodeConv[CONVX-1].cv_spec,spec) == 0 ){
		sv1log("## ignored the same [%d] CODECONV=%s\n",CONVX,spec);
		return 0;
	}

	cv = &CodeConv[CONVX];

	sscanf(spec,"%[^.]",outcx);
	scan_codename(spec,outcs);

	strcpy(incs,"jp");
	strcpy(incx,"jp");
	if( dp = strchr(spec,'.') ){
		dp++;
		if( strncmp(dp,"cc=",3) == 0 )
			dp += 3;

		if( dp[0] ){
			sscanf(dp,"%[^.]",incx);
			if( strncmp(dp,"cc=",3) == 0 )
				dp += 3;
			scan_codename(dp,incs);
		}
	}

	cv->cv_what = what;
	cv->cv_spec = stralloc(spec);
	cv->cv_occs = stralloc(outcx);
	cv->cv_iccs = stralloc(incx);

	if( strcaseeq(outcs, "html") )	oc = CC_HTML;	else
	if( strcaseeq(outcs, "fix" ) )  oc = CC_FIX;	else
	if( strcaseeq(outcs, "jp"  ) )  oc = CC_JIS;	else
	if( strcaseeq(outcs, "iso-2022-jp")) oc = CC_JIS; else
	if( strcaseeq(outcs, "jis" ) )  oc = CC_JIS;	else
	if( strcaseeq(outcs, "x-euc-jp")) oc = CC_EUC;	else
	if( strcaseeq(outcs, "euc-jp")) oc = CC_euc;	else
	if( strcaseeq(outcs, "euc" ) )  oc = CC_euc;	else
/*
	if( strcaseeq(outcs, "euc" ) )  oc = CC_EUC;	else
*/
	if( strcaseeq(outcs, "shift_jis")) oc = CC_sjis; else
	if( strcaseeq(outcs, "x-sjis") )  oc = CC_SJIS;	else
	if( strcaseeq(outcs, "sjis") )  oc = CC_sjis;	else
/*
	if( strcaseeq(outcs, "sjis") )  oc = CC_SJIS;	else
*/
	if( strcaseeq(outcs, "utf8") )  oc = CC_UTF8;	else
					oc = CC_THRU;

	cv->cv_occe = oc;

	if( strcaseeq(incs, "html") )	ic = CC_HTML;	else
					ic = CC_JIS;
	cv->cv_icce = ic;

/*
	sv1log("CODECONV[%d](%s,%s,%s) => %s.%s\n",CONVX,
		local?"local":"global",what,spec, outcx, incx);
*/
	sv1log("CODECONV[%d](%s,%s,%s) => %s.%s [%s]\n",CONVX,
		local?"local":"global",what,spec, outcx, incx,
		codename(oc)?codename(oc):"?");

	if( local )
		CodeConv_x++;
	else	CodeConv_X++;
	return 0;
}

static convenv(Conn,what,cvv)
	Connection *Conn;
	char *what;
	Conv *cvv[];
{	Conv *cv;
	int cvi,cvj,ncv;

	ncv = 0;

	/* for backward compatiblity */
	if( CONVX == CONVX0 ){
		int ic,oc;

		ic = oc = 0;
		if( strchr(DELEGATE_FLAGS,'e') ) oc = CC_euc;  else
		if( strchr(DELEGATE_FLAGS,'E') ) oc = CC_EUC;  else
		if( strchr(DELEGATE_FLAGS,'s') ) oc = CC_sjis; else
		if( strchr(DELEGATE_FLAGS,'S') ) oc = CC_SJIS; else
		if( strchr(DELEGATE_FLAGS,'J') ) oc = CC_JIS;

		if( ic != 0 || oc != 0 ){
			cv0 = CodeConv[0];
			if( ic ){ cv0.cv_icce = ic; }
			if( oc ){ cv0.cv_occe = oc; }
			cvv[ncv++] = &cv0;
		}
	}

	for( cvi = CONVX-1; 0 <= cvi; cvi-- ){
		cv = &CodeConv[cvi];
		if( cv->cv_occe == CC_THRU && 0 < ncv )
			break;

		for( cvj = 0; cvj < ncv; cvj++ )
			if( cvv[cvj]->cv_icce == cv->cv_icce )
				goto NEXT;

		cvv[ncv++] = cv;
		if( cv->cv_occe == CC_THRU )
			break;
	NEXT:;
	}

	return ncv;
}

plain2html()
{	int ncv,cvi;

	if( PLAIN_TO_HTML_PRE )
		return 1;
	return 0;
}

static codeconvs(Conn,what,cvenv)
	Connection *Conn;
	char *what,*cvenv;
{	Conv *cvv[16],*cv;
	int ncv,cvi;
	char *sp;

	ncv = convenv(Conn,CCV_TOCL,cvv);
	if( cvenv ){
		sp = cvenv;
		for( cvi = ncv-1; 0 <= cvi; cvi-- ){
			cv = cvv[cvi];
			sprintf(sp,"%s.%s",cv->cv_occs,cv->cv_iccs);
			sp += strlen(sp);
			if( 0 < cvi ){
				sprintf(sp,",");
				sp += strlen(sp);
			}
		}
		*sp = 0;
	}
	return cvv[0]->cv_occe;
}

CTX_cur_codeconvCL(Conn,cvenv)
	Connection *Conn;
	char *cvenv;
{	int occe;

	occe = codeconvs(Conn,CCV_TOCL,cvenv);
	if( occe == 0 || occe == CC_THRU )
		return 0;
	else	return occe;
}

CTX_codeconv_line(Conn,src,dst,ctype,repair)
	Connection *Conn;
	char *src,*dst,*ctype;
{
	if( repair )
	{
		/* setup for repair broken JIS codes... obsoleted */
		CTX_line_codeconv(Conn,src,dst,ctype);
	}
	else	CTX_line_codeconv(Conn,src,dst,ctype);
}

CTX_line_codeconv(Conn,src,dst,ctype)
	Connection *Conn;
	char *src,*dst,*ctype;
{	Conv *cvv[16],*cv;
	int ncv,cvi;
	char *tmpv[2],*tmp;
	int tmpx,tx;

	if( suppress_codeconv ){
		strcpy(dst,src);
		return;
	}
	if( src != dst )
		dst[0] = 0;

	ncv = convenv(Conn,CCV_TOCL,cvv);
	for( cvi = 0; cvi < ncv; cvi++ ){
		cv = cvv[cvi];
		tmpx = 0;
		if( src == dst )
			tmpv[tmpx++] = src = stralloc(src);

		switch( cv->cv_occe ){
		case CC_FIX:  FIX_2022(src,dst,ctype);break;
		case CC_euc:
		case CC_EUC:  TO_EUC( src,dst,ctype); break;
		case CC_sjis:
		case CC_SJIS: TO_SJIS(src,dst,ctype); break;
		case CC_JIS:  TO_JIS( src,dst,ctype); break;
		case CC_UTF8: TO_UTF8( src,dst,ctype);break;
		default:      strcpy( dst,src);       break;
		}

	NEXT:	for( tx = 0; tx < tmpx; tx++ )
			free(tmpv[tx]);
		src = dst;
	}
}

CTX_do_codeconv(Conn,ccode,src,dst,ctype)
	Connection *Conn;
	char *ccode,*src,*dst,*ctype;
{
	if( ccode == CCV_TOCL )
		CTX_line_codeconv(Conn,src,dst,ctype);
	else
	if( ccode == CCV_TOSV )
		TO_JIS(src,dst,ctype);
	else
	switch( ccode[0]  ){
		case CC_FIX:  FIX_2022(src,dst,ctype);break;
		case CC_euc:
		case CC_EUC:  TO_EUC(src,dst,ctype);  break;
		case CC_sjis:
		case CC_SJIS: TO_SJIS(src,dst,ctype); break;
		case CC_JIS:  TO_JIS(src,dst,ctype);  break;
		case CC_UTF8: TO_UTF8(src,dst,ctype); break;
		default:      strcpy(dst,src); break;
	}
}

codeconv_bufsize(ccode,size)
{
	switch( ccode ){
		case CC_FIX:  return JIS_BUFSIZ(size);
		case CC_JIS:  return JIS_BUFSIZ(size);
		case CC_UTF8: return JIS_BUFSIZ(size);
		case CC_euc:
		case CC_EUC:  return size;
		case CC_sjis:
		case CC_SJIS: return size;
	}
	return size;
}

CTX_check_codeconv(Conn,dolog)
	Connection *Conn;
{	char cvenv[128];
	int tocl;

	if( tocl = CTX_cur_codeconvCL(Conn,cvenv) ){
		if( dolog ){
			sv1log("Code Conversion [CHARCODE=%s]\n",cvenv);
		}
		return tocl;
	}
	return 0;
}

CTX_codeconv_get(Conn,ctype,xcharset, p2h)
	Connection *Conn;
	char *ctype;
	char **xcharset;
	int *p2h;
{
	if( CTX_cur_codeconvCL(Conn,NULL) ){
		if( xcharset ) *xcharset = CTX_codeconv_charset(Conn);
		if( p2h      ) *p2h = plain2html();
		return 1;
	}else{
		if( xcharset ) *xcharset = 0;
		if( p2h      ) *p2h = 0;
		return 0;
	}
}
CTX_codeconv_set(Conn,enable,charcode,p2h)
	Connection *Conn;
	char *charcode;
{	int osupp;

	osupp = suppress_codeconv;
	if( enable != -1 )
		suppress_codeconv = !enable;
	if( charcode != NULL )
		scan_CHARCODE1(charcode,0);
	if( p2h != -1 )
		PLAIN_TO_HTML_PRE = p2h;
	return !osupp;
}

CTX_check_codeconvSP(Conn,dolog)
	Connection *Conn;
{
	if( BORN_SPECIALIST || ACT_SPECIALIST || ACT_TRANSLATOR )
		return CTX_check_codeconv(Conn,dolog);
	else	return 0;
}

extern char *fgetsTIMEOUT();
CCV_relay_texts(Conn,ins,out,dup)
	Connection *Conn;
	FILE *ins[],*out,*dup;
{	char *rs,line[4096];
	char xline[4096];
	int totalc;
	int do_conv,do_ccx;
	FILE *in;
	int inx,pending;

	if( CCXactive(CCX_TOCL) ){
		do_conv = 1;
		do_ccx = 1;
	}else{
		do_conv = CTX_check_codeconvSP(Conn,1);
		do_ccx = 0;
	}
	totalc = 0;

	inx = 0;
	if( in = ins[inx] )
		inx++;
	pending = 0;

	while( in != NULL ){
		if( fPollIn(in,100) == 0 )
			fflushTIMEOUT(out);

		rs = fgetsTIMEOUT(line+pending,sizeof(line)-pending,in);
		if( rs == NULL ){
			if( in = ins[inx] ){
				inx++;
				continue;
			}
			if( pending == 0 )
				break;
		}else{
			if( line[pending] != 0 ) /* line buff. is not full */
			if( strpbrk(line,"\r\n") == 0 ){
				pending = strlen(line);
				continue;
			}
		}

		pending = 0;
		totalc += strlen(line);
		if( do_conv ){
			if( do_ccx )
				CCXexec(CCX_TOCL,line,strlen(line),xline,sizeof(xline));
			else	CTX_line_codeconv(Conn,line,xline,"*/*");
			fputs(xline,out);
		}else	fputs(line,out);

		if( ferror(out) && dup == NULL ){
			sv1log("## CCX_relay_texts: write error & no-cache\n");
			break;
		}

		if( dup )
			fputs(line,dup);
	}
	return  totalc;
}
CCV_relay_text(Conn,in,out,dup)
	Connection *Conn;
	FILE *in,*out,*dup;
{	FILE *ins[2];

	ins[0] = in;
	ins[1] = NULL;
	return CCV_relay_texts(Conn,ins,out,dup);
}
CCV_relay_textX(ccspec,in,out)
	char *ccspec;
	FILE *in,*out;
{	Connection ConnBuf, *Conn = &ConnBuf;

	ConnInit(Conn);
	CodeConv_X = CONVX0;
	CodeConv_x = 0;
	scan_CHARCODE(Conn,ccspec);
	ACT_TRANSLATOR = 1;
	return CCV_relay_text(Conn,in,out,NULL);
}


extern void *CCXnew();
void *ccx_global;

setCCX(Conn,code,stat)
	Connection *Conn;
	char *code,*stat;
{	char *st;
	int thru = 0;

	if( code[0] == 0 )
		strcpy(code,"j");

	switch( code[0] ){
		case 'u': case 'U': st = ": UTF-8"; break;
		case 'j': case 'J': st = ": JIS7 (ISO-2022-JP)"; break;
		case 'k': case 'K': st = ": JIS7 + 7bit/1byte-Kana"; break;
/*
		case 's': case 'S': st = ": ShiftJIS"; break;
		case 'e': case 'E': st = ": EUCJP"; break;
*/
		case 's': case 'S': st = ": Shift_JIS"; break;
		case 'e': case 'E': st = ": EUC-JP"; break;
		case 't': case 'T': st = ": Through (No conversion)";
			thru = 1;
			break;
		default : st = "? Unknown. Select one of J,K,S,E or T";
			break;
	}
	if( CCX != NULL ){
		if( ccx_global == CCX )
			ccx_global = NULL;
		free(CCX);
		CCX = NULL;
	}
	if( !thru )
		CCX = CCXnew("*",code);

	strcpy(stat,st);
}
global_setCCX(Conn,code,stat)
	Connection *Conn;
	char *code,*stat;
{
	setCCX(Conn,code,stat);
	ccx_global = CCX;
}

/*
 * for MIMEKit libary...
 */
static Connection *textconvCTX;
set_textconvCTX(Conn)
	Connection *Conn;
{
	textconvCTX = Conn;
}
Connection *get_textconvCTX()
{
	if( textconvCTX == NULL )
		textconvCTX = NewStruct(Connection);
	return textconvCTX;
}
codeconv_set(enable,charcode,p2h)
	char *charcode;
{
	return CTX_codeconv_set(get_textconvCTX(),enable,charcode,p2h);
}
codeconv_get(ctype,xcharset, p2h)
	char *ctype;
	char **xcharset;
	int *p2h;
{
	return CTX_codeconv_get(get_textconvCTX(),ctype,xcharset, p2h);
}
codeconv_line(src,dst,ctype,repair)
	char *src,*dst,*ctype;
{
	return CTX_codeconv_line(get_textconvCTX(),src,dst,ctype,repair);
}
