/*
 *	make_nftable.c
 *
 *	UNICODE Normarization Form table generator
 */

#include "includes.h"

static char *prog_name = NULL;

#define	MAXNFSTR	31
#define	MAXNFSTYLE	4

typedef struct {
  uint16 orig[MAXNFSTR+1];
  uint16 normal[MAXNFSTYLE+1][MAXNFSTR+1];
} nftable_t;

/*
 * Print program usage and die.
 */

static void nftable_usage(char *progname)
{
  fprintf(stderr,
         "Usage is : %s <codepage> <normalizationfile> <macfile> <inputfile> <outputfile>\n",
         progname);
  exit(1);
}

/*
 * Read a line from a buffer into a line buffer. Ensure null
 * terminated.
 */

static void read_line( char **buf, char *line_buf, size_t size)
{
  char *p = *buf;
  size_t num = 0;

  for(; *p && (*p != '\n') && (*p != '\032'); p++) {
    if(num < (size - 1))
      line_buf[num++] = *p;
  }
  if(*p)
    p++; /* Go past the '\n' */
  line_buf[num] = '\0';
  *buf = p;
}

/*
 * Strip comment lines and blank lines from the data.
 * Copies into a new buffer and frees the old.
 * Returns the number of lines copied.
 */

static size_t clean_data( char **buf, size_t *size)
{
  pstring linebuf;
  char *p = *buf;
  size_t num_lines = 0;
  char *newbuf = (char *)malloc( *size + 1);
  char *newbuf_p = NULL;

  if(newbuf == NULL) {
    fprintf(stderr, "%s: malloc fail for size %u.\n", prog_name, (unsigned int)(*size + 1));
    exit(1);
  }

  newbuf_p = newbuf;
  *newbuf_p = '\0';

  while( *p ) {
    char *cp;

    read_line( &p, linebuf, sizeof(linebuf));
    /* Null terminate after comment. */
    if((cp = strchr( linebuf, '#'))!= NULL)
      *cp = '\0';

    for(cp = linebuf;*cp && isspace(*cp); cp++)
      ;

    if(*cp == '\0' || *cp == '@')
      continue;

    safe_strcpy(newbuf_p, cp, *size - (newbuf_p - newbuf));
    num_lines++;
    newbuf_p += (strlen(newbuf_p) + 1);
  }

  free(*buf);
  *buf = newbuf;
  return num_lines;
}

/*
 * Parse uint16 from a normalization form table file.
 */

static int gethex(uint16 str[], char **p)
{
  uint16 n;
  int i, c, max;

  for (max = 0; max < MAXNFSTR; max++) {
    n = 0;
    for (i = 0; i < 4; i++) {
      if (!(c = *((*p)++))) return(-1);
      if (c >= '0' && c <= '9') c -= '0';
      else if (c >= 'A' && c <= 'F') c -= 'A' - 10;
      else if (c >= 'a' && c <= 'f') c -= 'a' - 10;
      else return(-1);
      n = n * 16 + c;
    }
    if (str) str[max] = n;

    c = *((*p)++);
    if ((c >= '0' && c <= '9')
    || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f')) {
      /* ignore over 16bits */
      return(0);
    }
    if (c == ';') break;
    else if (c != ' ') return(-1);
  }
  if (max >= MAXNFSTR) return(-1);
  if (str) str[max + 1] = 0;
  return(max + 1);
}

/*
 * Compare UCS2 string
 */

static int compare_ucs2(nftable_t *p1, nftable_t *p2)
{
  uint16 *s1, *s2;

  s1 = p1 -> orig;
  s2 = p2 -> orig;
  for (;;) {
    if (*s1 != *s2) return(*s1 - *s2);
    if (!*s1) break;
    s1++;
    s2++;
  }
  return(0);
}

/*
 * Parse a uint16 from a codepage file.
 */

static BOOL parse_uint16(char *buf, uint16 *uip)
{
  unsigned int ui;
  char *endptr = NULL;

  ui = (unsigned int)strtol(buf, &endptr, 0);
  if(endptr == buf || ui > 65535)
    return False;

  *uip = (uint16)ui;
  return True;
}

/*
 * Print a parse error and exit.
 */

static void parse_error(const char *buf, const char *input_file, const char *msg)
{
  fprintf(stderr, "%s: In file %s : %s whilst parsing line \n%s\n", prog_name,
          input_file, msg, buf);
  exit(1);
}

/*
 * Create a compiled normalization form table file
 * from a normalization form definition file.
 */

static int do_compile(const char *codepage, const char *normalization_file, const char *macosx_file, const char *input_file, const char *output_file)
{
  FILE *fp = NULL;
  size_t size = 0;
  size_t offset = 0;
  char *buf = NULL;
  char *buftop = NULL;
  char *output_buf = NULL;
  nftable_t *nf_table = NULL;
  char ucs2flag[65536];
  int num_lines = 0;
  int num_tables = 0;
  int i, j, n, max_normal[MAXNFSTYLE+1];
  SMB_STRUCT_STAT st;

  /* Get the size of the input file. Read the entire thing into memory. */
  if(sys_stat((char *)input_file, &st)!= 0) {
    fprintf(stderr, "%s: failed to get the file size for file %s. Error was %s\n",
            prog_name, input_file, strerror(errno));
    exit(1);
  }

  size = (size_t)st.st_size;

  if((fp = sys_fopen(input_file, "r")) == NULL) {
    fprintf(stderr, "%s: cannot open file %s for input.\n", prog_name, input_file);
    exit(1);
  }

  /* As we will be reading text, allocate one more byte for a '\0' */
  if((buf = (char *)malloc( size + 1 )) == NULL) {
    fprintf(stderr, "%s: malloc fail for size %d.\n", prog_name, size + 1);
    fclose(fp);
    exit(1);
  }

  if(fread( buf, 1, size, fp) != size) {
    fprintf(stderr, "%s: read failed for file %s. Error was %s.\n", prog_name,
            input_file, strerror(errno));
    free((char *)buf);
    fclose(fp);
    exit(1);
  }

  /* Null terminate the text read. */
  buf[size] = '\0';
  fclose(fp);

  /* Go through the data line by line, strip out comments (anything
     after a '#' to end-of-line) and blank lines. The rest should be
     the codepage data.
   */

  num_lines = clean_data( &buf, &size);
  buftop = buf;

  /*
   * Initialize the codepage data.
   */

  memset(ucs2flag, '\0', sizeof(ucs2flag));

  /* Now pick up the valid UCS2 code in this codepage. */

  for(i = 0; i < num_lines; i++) {
    char token_buf[512];
    char *p = buf;
    uint16 ucs2 = 0;

    /* Get the codepage value. */
    if(!next_token(&p, token_buf, NULL, sizeof(token_buf)))
      parse_error(buf, input_file, "cannot parse first value");

    if(!parse_uint16( token_buf, &ucs2))
      parse_error(buf, input_file, "first value doesn't resolve to an unsigned 16 bit integer");

    /* Get the ucs2 value. */

    if(!next_token(&p, token_buf, NULL, sizeof(token_buf))) {

      /*
       * Some of the multibyte codepage to unicode map files
       * list a single byte as a leading multibyte and have no
       * second value.
       */

      buf += (strlen(buf) + 1);
      continue;
    }

    if(!parse_uint16( token_buf, &ucs2))
      parse_error(buf, input_file, "second value doesn't resolve to an unsigned 16 bit integer");

    ucs2flag[ucs2] = 1;

    /*
     * Next line.
     */
    buf += (strlen(buf) + 1);
  }
  free((char *)buftop);


  /* Get the size of the normalization file. Read the entire thing into memory. */
  if(sys_stat((char *)normalization_file, &st)!= 0) {
    fprintf(stderr, "%s: failed to get the file size for file %s. Error was %s\n",
            prog_name, normalization_file, strerror(errno));
    exit(1);
  }

  size = (size_t)st.st_size;

  if((fp = fopen(normalization_file, "r")) == NULL) {
    fprintf(stderr, "%s: cannot open file %s for input.\n", prog_name, normalization_file);
    exit(1);
  }

  /* As we will be reading text, allocate one more byte for a '\0' */
  if((buf = (char *)malloc( size + 1 )) == NULL) {
    fprintf(stderr, "%s: malloc fail for size %d.\n", prog_name, size + 1);
    fclose(fp);
    exit(1);
  }

  if(fread( buf, 1, size, fp) != size) {
    fprintf(stderr, "%s: read failed for file %s. Error was %s.\n", prog_name,
            normalization_file, strerror(errno));
    free((char *)buf);
    fclose(fp);
    exit(1);
  }

  /* Null terminate the text read. */
  buf[size] = '\0';
  fclose(fp);

  /* Go through the data line by line, strip out comments (anything
     after a '#' to end-of-line) and blank lines. The rest should be
     the normalization form data.
   */

  num_lines = clean_data( &buf, &size);
  buftop = buf;

  /*
   * Initialize the output data.
   */

  if((nf_table = (nftable_t *)malloc(num_lines * sizeof(nftable_t))) == NULL) {
    fprintf(stderr, "%s: malloc fail for size %d.\n", prog_name,
           num_lines * sizeof(nftable_t));
    free((char *)buf);
    exit(1);
  }

  num_tables = 0;
  for (n = 0; n < MAXNFSTYLE; n++) max_normal[n] = 0;
  for (i = 0; i < num_lines; i++) {
    char *p = buf;
    int max;

    for (j = 0; j < MAXNFSTR+1; j++) nf_table[num_tables].orig[j] = 0;
    if((max = gethex(nf_table[num_tables].orig, &p)) < 0)
      parse_error(buf, normalization_file, "unexpected token");
    if (max != 1 || ucs2flag[nf_table[num_tables].orig[0]] == 0) max = 0;
    for (n = 0; n < MAXNFSTYLE; n++) {
      if (!max) break;
      for (j = 0; j < MAXNFSTR+1; j++) nf_table[num_tables].normal[n][j] = 0;
      if ((max = gethex(&(nf_table[num_tables].normal[n][0]), &p)) < 0)
	parse_error(buf, normalization_file, "unexpected token");
      if (max > max_normal[n]) max_normal[n] = max;
    }

    if(max) num_tables++;

    /*
     * Next line.
     */
    buf += (strlen(buf) + 1);
  }
  free((char *)buftop);


  /* Get the size of the normalization Mac file. Read the entire thing into memory. */
  if(sys_stat((char *)macosx_file, &st)!= 0) {
    fprintf(stderr, "%s: failed to get the file size for file %s. Error was %s\n",
            prog_name, macosx_file, strerror(errno));
    exit(1);
  }

  size = (size_t)st.st_size;

  if((fp = fopen(macosx_file, "r")) == NULL) {
    fprintf(stderr, "%s: cannot open file %s for input.\n", prog_name, macosx_file);
    exit(1);
  }

  /* As we will be reading text, allocate one more byte for a '\0' */
  if((buf = (char *)malloc( size + 1 )) == NULL) {
    fprintf(stderr, "%s: malloc fail for size %d.\n", prog_name, size + 1);
    fclose(fp);
    exit(1);
  }

  if(fread( buf, 1, size, fp) != size) {
    fprintf(stderr, "%s: read failed for file %s. Error was %s.\n", prog_name,
            macosx_file, strerror(errno));
    free((char *)buf);
    fclose(fp);
    exit(1);
  }

  /* Null terminate the text read. */
  buf[size] = '\0';
  fclose(fp);

  /* Go through the data line by line, strip out comments (anything
     after a '#' to end-of-line) and blank lines. The rest should be
     the normalization form data.
   */

  num_lines = clean_data( &buf, &size);
  buftop = buf;

  /*
   * Initialize the output data.
   */

  if((nf_table = (nftable_t *)realloc(nf_table, (num_tables + num_lines) * sizeof(nftable_t))) == NULL) {
    fprintf(stderr, "%s: malloc fail for size %d.\n", prog_name,
	   (num_tables + num_lines) * sizeof(nftable_t));
    free((char *)buf);
    exit(1);
  }

  max_normal[MAXNFSTYLE] = 0;
  for (i = 0; i < num_tables; i++) {
    for (j = 0; j < MAXNFSTR+1; j++)
      nf_table[i].normal[MAXNFSTYLE][j] = nf_table[i].orig[j];
  }
  for (i = 0; i < num_lines; i++) {
    char *p = buf;
    int max;

    for (j = 0; j < MAXNFSTR+1; j++) nf_table[num_tables].orig[j] = 0;
    if((max = gethex(nf_table[num_tables].orig, &p)) < 0)
      parse_error(buf, macosx_file, "unexpected token");
    if (max != 1 || ucs2flag[nf_table[num_tables].orig[0]] == 0) max = 0;
    for (n = MAXNFSTYLE; n <= MAXNFSTYLE; n++) {
      if (!max) break;
      for (j = 0; j < MAXNFSTR+1; j++) nf_table[num_tables].normal[n][j] = 0;
      if ((max = gethex(&(nf_table[num_tables].normal[n][0]), &p)) < 0)
	parse_error(buf, macosx_file, "unexpected token");
      if (max > max_normal[n]) max_normal[n] = max;
    }

    if (max) {
      for (n = 0; n < num_tables; n++) {
	for (j = 0; nf_table[num_tables].orig[j]; j++)
	  if (nf_table[num_tables].orig[j] != nf_table[n].orig[j]) break;
	if (!nf_table[num_tables].orig[j]) {
	  for (j = 0; j < MAXNFSTR+1; j++)
	    nf_table[n].normal[MAXNFSTYLE][j] = nf_table[num_tables].normal[MAXNFSTYLE][j];
	  break;
	}
      }
      if (n >= num_tables) {
	for (n = 0; n < MAXNFSTYLE; n++) {
	  for (j = 0; j < MAXNFSTR+1; j++)
	    nf_table[num_tables].normal[n][j] = nf_table[num_tables].orig[j];
	}
	num_tables++;
      }
    }
    /*
     * Next line.
     */
    buf += (strlen(buf) + 1);
  }
  free((char *)buftop);


  qsort(nf_table, num_tables, sizeof(nftable_t), QSORT_CAST compare_ucs2);
  for (i = 0; i < num_tables - 1; i++) {
    if (nf_table[i].orig[0] != nf_table[i+1].orig[0]) continue;
    num_tables--;
    for (j = i + 1; j < num_tables; j++) {
      memcpy(&nf_table[j], &nf_table[j+1], sizeof(nftable_t));
    }
  }

  size = NFTABLE_HEADER_SIZE + sizeof(uint16) * num_tables;
  for (n = 0; n < MAXNFSTYLE+1; n++)
    size += (max_normal[n]+1) * sizeof(uint16) * num_tables;

  if((output_buf = (char *)malloc( size )) == NULL) {
    fprintf(stderr, "%s: output buffer malloc fail for size %d.\n", prog_name, size);
    exit(1);
  }

  /* Setup the output file header. */
  SSVAL(output_buf,NFTABLE_VERSION_OFFSET,NFTABLE_FILE_VERSION_ID);
  memset(&output_buf[NFTABLE_CLIENT_CODEPAGE_OFFSET],'\0',NFTABLE_CODEPAGE_ID_SIZE);
  safe_strcpy(&output_buf[NFTABLE_CLIENT_CODEPAGE_OFFSET], codepage, NFTABLE_CODEPAGE_ID_SIZE - 1);
  output_buf[NFTABLE_CLIENT_CODEPAGE_OFFSET+NFTABLE_CODEPAGE_ID_SIZE-1] = '\0';
  SIVAL(output_buf,NFTABLE_SIZE_OFFSET,num_tables);
  for (n = 0; n < MAXNFSTYLE+1; n++)
    SSVAL(output_buf,NFTABLE_NORMAL_LENGTH_OFFSET+n*2,max_normal[n]+1);

  offset = NFTABLE_HEADER_SIZE;

  for (i = 0; i < num_tables; i++) {
    SSVAL(output_buf, offset, nf_table[i].orig[0]);
    offset += sizeof(uint16);
  }
  for (n = 0; n < MAXNFSTYLE+1; n++) {
    for (i = 0; i < num_tables; i++) {
      for (j = 0; j <= max_normal[n]; j++) {
	SSVAL(output_buf, offset, nf_table[i].normal[n][j]);
	offset += sizeof(uint16);
      }
    }
  }

  /* Now write out the output_buf. */
  if((fp = sys_fopen(output_file, "w"))==NULL) {
    fprintf(stderr, "%s: Cannot open output file %s. Error was %s.\n",
            prog_name, output_file, strerror(errno));
    exit(1);
  }

  if(fwrite(output_buf, 1, size, fp) != size) {
    fprintf(stderr, "%s: Cannot write output file %s. Error was %s.\n",
            prog_name, output_file, strerror(errno));
    exit(1);
  }

  fclose(fp);

  return 0;
}

int main(int argc, char **argv)
{
  const char *codepage = NULL;
  char *normalization_file = NULL;
  char *macosx_file = NULL;
  char *input_file = NULL;
  char *output_file = NULL;

  prog_name = argv[0];

  if (argc != 6)
    nftable_usage(prog_name);

  codepage = argv[1];
  normalization_file = argv[2];
  macosx_file = argv[3];
  input_file = argv[4];
  output_file = argv[5];

  return do_compile( codepage, normalization_file, macosx_file, input_file, output_file);
}
