/*
 * Copyright (c) 1999 Sun Microsystems, Inc.
 * Copyright (c) 1999 Nihon Sun Microsystems K.K.
 * All rights reserved.
 */

/*
 * "$Id: ct_utf16.c,v 1.2 1999/05/24 11:48:52 kasha Exp $"
 */

#pragma ident	"@(#)ct_utf16.c 1.2	99/05/24 SMI"


#if defined(CSC_CT_UTF8W)
#  define CSC_CT_UTF8	1
#endif /* CSC_CT_UTF8W */

#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <sys/types.h>
#if defined(CSC_CT_UTF8W)
#include <locale.h>
#include <wchar.h>
#endif /* CSC_CT_UTF8W */


#include "csconv.h"
#include "utf16_ct_map.h"
#if defined(CSC_CT_UTF8)
#include "csc_utf.h"
#endif /* CSC_CT_UTF8 */


#define	MAXVAL_14BIT		((16 * 1024) - 1)
#define	ERROR_BREAK(err)	errno = (err);		\
				ret_val = (size_t)(-1);	\
				break
#undef	CSC_KEEP_MAP_CURRENT

#if !defined(INTERIM_ENCODING)
#define INTERIM_ENCODING	"UTF-8"
#endif /* !INTERIM_ENCODING */
#if !defined(INTERIM_ENCODING_DELIM)
#define INTERIM_ENCODING_DELIM '%'
#endif /* !INTERIM_ENCODING_DELIM */


struct _csconv_info {
	int		active;
#if defined(CSC_CT_UTF8W)
	char *		locale;
#endif /* CSC_CT_UTF8W */
};


extern const csc_utf16_ct_map_t * csc_national_utf16_map[];

static int
extended_segment_conv(
	const csc_utf16_ct_map_t *	map,
	const unsigned char **		inbuf,
	size_t *			inbytesleft,
	unsigned char **		outbuf,
	size_t *			outbytesleft
);


#if defined(CSC_CT_UTF8W)
#  define MB_BUF_LEN		(32)
#endif /* CSC_CT_UTF8W */

#if defined(CSC_CT_UTF8W)
#  define ct_utf16_open		ct_utf8w_open
#  define ct_utf16_close	ct_utf8w_close
#  define ct_utf16_conv		ct_utf8w_conv
#else /* !CSC_CT_UTF8W */
#if defined(CSC_CT_UTF8)
#  define ct_utf16_open		ct_utf8_open
#  define ct_utf16_close	ct_utf8_close
#  define ct_utf16_conv		ct_utf8_conv
#endif /* CSC_CT_UTF8 */
#endif /* !CSC_CT_UTF8W */


csconv_t
ct_utf16_open(
	const char *	locale,
	const char *	tocode,
	const char *	fromcode)
{
	csconv_t			cd;
	int				ret_errno;
#if defined(CSC_CT_UTF8W)
	char *				locale_dup;
#endif /* CSC_CT_UTF8W */

	cd = NULL;
#if defined(CSC_CT_UTF8W)
	locale_dup = NULL;
#endif /* CSC_CT_UTF8W */

	do {
		if (NULL == (cd = malloc(sizeof (*cd)))) {
			ret_errno = ENOMEM;
			continue;
		}

#if defined(CSC_CT_UTF8W)
		locale_dup = strdup(locale);
		if (NULL == locale_dup) {
			ret_errno = ENOMEM;
			continue;
		}
#endif /* CSC_CT_UTF8W */

		cd->active = 1;
#if defined(CSC_CT_UTF8W)
		cd->locale = locale_dup;
#endif /* CSC_CT_UTF8W */

		return cd;
	} while (0);

#if defined(CSC_CT_UTF8W)
	free(locale_dup);
#endif /* CSC_CT_UTF8W */
	free(cd);

	errno = ret_errno;
	return NULL;
}


void
ct_utf16_close(csconv_t cd)
{
	if (NULL == cd) {
		return;
	}

#if defined(CSC_CT_UTF8W)
	free(cd->locale);
#endif /* CSC_CT_UTF8W */
	free(cd);

	return;
}


size_t
ct_utf16_conv(
	csconv_t	cd,
	const char **	inbuf,
	size_t *	inbytesleft,
	char **		outbuf,
	size_t *	outbytesleft)
{
	size_t				ret_val;
	const unsigned char * 		ip;
	size_t				ileft;
	unsigned char *			op;
	size_t				oleft;
	unsigned short			uc;
	const csc_utf16_ct_map_t **	map;
	const csc_utf16_ct_map_t *	map_current;
	unsigned int			high;
	unsigned int			low;
	int				len;
	int				ret_errno;
#if defined(CSC_CT_UTF8W)
	char *				locale;
	unsigned char			mb_buf[MB_BUF_LEN];
	unsigned char *			mb;
	size_t				mb_buf_len;
	unsigned char *			op1;
	size_t				oleft1;
	char *				p;
#endif /* CSC_CT_UTF8W */

	ret_val = 0;

#if defined(CSC_CT_UTF8W)
	locale = NULL;
	mb = mb_buf;
	mb_buf_len = (sizeof (mb_buf));
#endif /* CSC_CT_UTF8W */

	if (NULL == cd) {
		errno = EBADF;
		return (size_t)(-1);
	}

	if ((NULL == inbuf) || (NULL == *inbuf)) {
		return 0;
	}

#if defined(CSC_CT_UTF8W)
	p = setlocale(LC_CTYPE, NULL);
	if (0 == strcmp(p, cd->locale)) {
		locale = NULL;
	} else {
		locale = strdup(p);
		if (NULL == locale) {
			errno = ENOMEM;
			return (size_t)(-1);
		}
		if (NULL == setlocale(LC_CTYPE, cd->locale)) {
			free(locale);
			errno = EBADF;
			return (size_t)(-1);
		}
	}
#endif /* CSC_CT_UTF8W */

	map_current = NULL;
	ret_errno = 0;

	ip = (unsigned char *)(*inbuf);
	ileft = *inbytesleft;
	op = (unsigned char *)(*outbuf);
	oleft = *outbytesleft;

	while (0 < ileft) {
		if (0x1b != *(ip)) {
			if (NULL == map_current) {
				if (0 < oleft) {
					*(op++) = *(ip++);
					--ileft;
					--oleft;
					continue;
				} else {
					ERROR_BREAK(E2BIG);
				}
			}

			if ((1 != map_current->extended_segment) &&
			    ((0x09 == *ip) || (0x0a == *ip))) {
#if defined(CSC_CT_UTF8)
#  if defined(CSC_CT_UTF8W)
				if (oleft <= (sizeof (wchar_t))) {
					ERROR_BREAK(E2BIG);
				}
				if (1 == mbtowc((wchar_t *)op,
						(const char *)ip, 1)) {
					op += (sizeof (wchar_t));
					oleft -= (sizeof (wchar_t));
				}
				ip += 1;
#  else /* !CSC_CT_UTF8W */
				if (oleft <= 0) {
					ERROR_BREAK(E2BIG);
				}
				*(op++) = *(ip++);
				oleft -= 1;
#  endif /* !CSC_CT_UTF8W */
#else /* !CSC_CT_UTF8 */
				if (oleft <= 1) {
					ERROR_BREAK(E2BIG);
				}
				*(op++) = 0x00;
				*(op++) = *(ip++);
				oleft -= 2;
#endif /* !CSC_CT_UTF8 */
				ileft -= 1;
				continue;
			}

			if (ileft < map_current->in_code_length) {
				ERROR_BREAK(EINVAL);
			}

#if defined(CSC_CT_UTF8)
#  if defined(CSC_CT_UTF8W)
			if (oleft < (sizeof (wchar_t))) {
				ERROR_BREAK(E2BIG);
			}
#  else /* !CSC_CT_UTF8W */
#  endif /* !CSC_CT_UTF8W */
#else /* !CSC_CT_UTF8 */
			if (oleft < map_current->out_code_length) {
				ERROR_BREAK(E2BIG);
			}
#endif /* !CSC_CT_UTF8 */

			if (1 == map_current->in_code_length) {
				high = 0;
				low = *(ip++);
				--ileft;

			} else if (2 == map_current->in_code_length) {
				high = *(ip++);
				low = *(ip++);
				ileft -= 2;

			} else {
				ERROR_BREAK(EBADF);
			}

			if (NULL == map_current->map[high]) {
#if defined(CSC_CT_UTF8)
#  if defined(CSC_CT_UTF8W)
				op1 = mb;
				oleft1 = mb_buf_len;
				UTF16_UTF8(0xfffd, len, op1, oleft1);
				if (len != mblen((const char *)mb, len)) {
					continue;
				}
				if (oleft < len) {
					ERROR_BREAK(E2BIG);
				}
				len = mbtowc((wchar_t *)op,
					     (const char *)mb, len);
				if (0 < len) {
					op += len;
					oleft -= len;
				}
#  else /* !CSC_CT_UTF8W */
				UTF16_UTF8(0xfffd, len, op, oleft);
#  endif /* !CSC_CT_UTF8W */
#else /* !CSC_CT_UTF8 */
				*(op++) = 0x00ff;
				*(op++) = 0x00fd;
				oleft -= 2;
#endif /* !CSC_CT_UTF8 */
			} else {
				uc = *((unsigned short *)
				       (map_current->map[high]) + low);
#if defined(CSC_CT_UTF8)
#  if defined(CSC_CT_UTF8W)
				op1 = mb;
				oleft1 = mb_buf_len;
				UTF16_UTF8(uc, len, op1, oleft1);
				if (len != mblen((const char *)mb, len)) {
					continue;
				}
				if (oleft < len) {
					ERROR_BREAK(E2BIG);
				}
				len = mbtowc((wchar_t *)op,
					     (const char *)mb, len);
				if (0 < len) {
					op += (sizeof (wchar_t));
					oleft -= (sizeof (wchar_t));
				}
#  else /* !CSC_CT_UTF8W */
				UTF16_UTF8(uc, len, op, oleft);
#  endif /* !CSC_CT_UTF8W */
#else /* !CSC_CT_UTF8 */
				uc = *((unsigned short *)
				       (map_current->map[high]) + low);
				*(op++) = (uc >> 8);
				*(op++) = (uc & 0x00ff);
				oleft -= 2;
#endif /* !CSC_CT_UTF8 */
			}

			continue;
		}

		for (map = csc_national_utf16_map; NULL != *map; map++) {
			if (1 == (*map)->extended_segment) {
				len = extended_segment_conv(*map,
							    &ip, &ileft,
							    &op, &oleft);
				if (len < 0) {
					ret_errno = errno;
					break;
				} else if (0 == len) {
					continue;
				} else {
					break;
				}
			} else {
				if (ileft < (*map)->desig_length) {
					continue;
				}
				if (0 == memcmp(ip, (*map)->desig,
						(*map)->desig_length)) {
					ip += (*map)->desig_length;
					ileft -= (*map)->desig_length;
					break;
				} else {
					continue;
				}
			}
		}

		if (0 != ret_errno) {
			ERROR_BREAK(ret_errno);
		} else if (NULL == *map) {
			ERROR_BREAK(EILSEQ);
		}

		map_current = (*map);
	}

	*inbuf = (const char *)ip;
	*inbytesleft = ileft;
	*outbuf = (char *)op;
	*outbytesleft = oleft;

#if defined(CSC_CT_UTF8W)
	if (NULL != locale) {
		setlocale(LC_CTYPE, locale);
		free(locale);
	}
#endif /* CSC_CT_UTF8W */

	return ret_val;
}


static int
extended_segment_conv(
	const csc_utf16_ct_map_t *	map,
	const unsigned char **		inbuf,
	size_t *			inbytesleft,
	unsigned char **		outbuf,
	size_t *			outbytesleft)
{
	size_t			ret_val;
	int			len;
	int			ret;
	const unsigned char *	ip;
	size_t			ileft;
	unsigned char *		op;
	size_t			oleft;
	const unsigned char *	desig;
	unsigned int		desig_length;
	unsigned int		high;
	unsigned int		low;
	unsigned short		uc;
#if defined(CSC_CT_UTF8W)
	unsigned char			mb_buf[MB_BUF_LEN];
	unsigned char *			mb;
	size_t				mb_buf_len;
	unsigned char *			op1;
	size_t				oleft1;
#endif /* CSC_CT_UTF8W */

	ip = *inbuf;
	ileft = *inbytesleft;

	if (ileft < (map->desig_length - 1)) {
		return 0;
	}

	desig = map->desig;
	desig_length = map->desig_length;

	if ((*(desig + 1) != *(ip + 1)) ||
	    (*(desig + 2) != *(ip + 2)) ||
	    (*(desig + 3) != *(ip + 3))) {
		return 0;
	}

	if (0 != memcmp(ip + 6, desig + 6, desig_length - 6 - 1)) {
		return 0;
	}

	len = (((*(ip + 4) & 0x007f) << 7) + (*(ip + 5) & 0x007f) + 6);

	if (ileft < desig_length) {
		if ((desig_length - 1) != len) {
			errno = EINVAL;
			return -1;
		} else {
			ip += ileft;
			ileft = 0;
			len = 0;
		}
	} else if (ileft == desig_length) {
		if (0x02 == *(ip + ileft - 1)) {
			ip += ileft;
			ileft = 0;
			len = 0;
		} else {
			ip += (ileft - 1);
			ileft = 1;
			len = 1;
		}
	} else {
		if (0x02 == *(ip + desig_length - 1)) {
			ip += desig_length;
			ileft -= desig_length;
			len -= desig_length;
		} else {
			ip += (desig_length - 1);
			ileft -= (desig_length - 1);
			len -= (desig_length - 1);
		}
	}

	op = *outbuf;
	oleft = *outbytesleft;

	ret = len;

	for (; 0 < len; len -= map->in_code_length) {
		if ((len < map->in_code_length) ||
		    (ileft < map->in_code_length)) {
			ERROR_BREAK(EINVAL);
		}
		if (oleft < 2) {
			ERROR_BREAK(E2BIG);
		}

		if (1 == map->in_code_length) {
			high = 0;
			low = *(ip++);
			--ileft;

		} else if (2 == map->in_code_length) {
			high = *(ip++);
			low = *(ip++);
			ileft -= 2;

		} else {
			ERROR_BREAK(EBADF);
		}

		if (NULL == map->map[high]) {
#if defined(CSC_CT_UTF8)
#  if defined(CSC_CT_UTF8W)
			op1 = mb;
			oleft1 = mb_buf_len;
			UTF16_UTF8(0xfffd, len, op1, oleft1);
			if (len != mblen((const char *)mb, len)) {
				continue;
			}
			if (oleft < len) {
				ERROR_BREAK(E2BIG);
			}
			len = mbtowc((wchar_t *)op,
				     (const char *)mb, len);
			if (0 < len) {
				op += len;
				oleft -= len;
			}
#  else /* !CSC_CT_UTF8W */
			UTF16_UTF8(0xfffd, len, op, oleft);
#  endif /* !CSC_CT_UTF8W */
#else /* !CSC_CT_UTF8 */
			*(op++) = 0x00ff;
			*(op++) = 0x00fd;
			oleft -= 2;
#endif /* !CSC_CT_UTF8 */
		} else {
			uc = *((unsigned short *)
			       (map->map[high]) + low);
#if defined(CSC_CT_UTF8)
#  if defined(CSC_CT_UTF8W)
			op1 = mb;
			oleft1 = mb_buf_len;
			UTF16_UTF8(uc, len, op1, oleft1);
			if (len != mblen((const char *)mb, len)) {
				continue;
			}
			if (oleft < len) {
				ERROR_BREAK(E2BIG);
			}
			len = mbtowc((wchar_t *)op,
				     (const char *)mb, len);
			if (0 < len) {
				op += (sizeof (wchar_t));
				oleft -= (sizeof (wchar_t));
			}
#  else /* !CSC_CT_UTF8W */
			UTF16_UTF8(uc, len, op, oleft);
#  endif /* !CSC_CT_UTF8W */
#else /* !CSC_CT_UTF8 */
			uc = *((unsigned short *)(map->map[high]) + low);
			*(op++) = (uc >> 8);
			*(op++) = (uc & 0x00ff);
			oleft -= 2;
#endif /* !CSC_CT_UTF8 */
		}

#if 0
		if (NULL == map->map[high]) {
			*(op++) = 0x00ff;
			*(op++) = 0x00fd;
		} else {
			uc = *((unsigned short *)(map->map[high]) + low);
			*(op++) = (uc >> 8);
			*(op++) = (uc & 0x00ff);
		}
		oleft -= 2;
#endif /* 0 */
	}

	*inbuf = ip;
	*inbytesleft = ileft;
	*outbuf = op;
	*outbytesleft = oleft;

	return (ret - len);
}
