/* -*-C-*-
********************************************************************************
*
* File:         htmlparseI.c
* RCS:          $Id: $
* Description:	Interfaces to the HTML Parser library. Needed to Separate
*		these from htmlparse.c due to some naming conflicts between
*		HTML parser lib and Xlisp lib.
* Author:       Niels Mayer
* Created:      Sat Jul 16 00:00:00 1995
* Modified:     Sun Nov 23 21:38:51 1997 (Niels Mayer) npm@mayer.netcom.com
* Language:     C
* Package:      N/A
* Status:       Experimental, do not distribute.
*
* Copyright (C) 1994-1996 Enterprise Integration Technologies Corp. and Niels Mayer.
* WINTERP 1.15-1.99, Copyright (c) 1993, Niels P. Mayer.
* WINTERP 1.0-1.14, Copyright (c) 1989-1992 Hewlett-Packard Co. and Niels Mayer.
* 
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of Enterprise Integration Technologies,
* Hewlett-Packard Company, or Niels Mayer not be used in advertising or
* publicity pertaining to distribution of the software without specific,
* written prior permission. Enterprise Integration Technologies, Hewlett-Packard
* Company, and Niels Mayer makes no representations about the suitability of
* this software for any purpose.  It is provided "as is" without express or
* implied warranty.
* 
* ENTERPRISE INTEGRATION TECHNOLOGIES, HEWLETT-PACKARD COMPANY AND NIELS MAYER
* DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL ENTERPRISE
* INTEGRATION TECHNOLOGIES, HEWLETT-PACKARD COMPANY OR NIELS MAYER BE LIABLE
* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
* RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
* CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
********************************************************************************
*/
static char rcs_identity[] = "@(#)$Id: $";

#include <stdio.h>
#include <errno.h>
extern char *sys_errlist[];	/* part of <errno.h>, but not defined by it. */
extern int sys_nerr;		/* part of <errno.h>, but not defined by it. */
extern int errno;		/* some but not all systems require this */

#include "../../../jays-parser/dtd/libdtd/atom.h"
#include "../../../jays-parser/dtd/libdtd/dtd.h"
#include "../../../jays-parser/html/html.h"
#include "../../../jays-parser/html/htmlloc.h" /* for 'current', '_inittree()' etc. */

static FILE* parse_err_fp = NULL; /* init'd in Htp_Parse_Tree_From_HTML_File */

static int
Htp_HTML_Error_Hook(struct error *errp)
{
  char* errstr;
  switch (errp->severity) {
  case E_INFORMATION:
    errstr = "HTML INFORMATION";
    break;
  case E_WARNING:
    errstr = "HTML WARNING";
    break;
  case E_SEVERE:
    errstr = "HTML ERROR";
    break;
  case E_CRASH:
    errstr = "HTML CRASH";
    break;
  }

  fprintf(parse_err_fp,
	  "%s: %d: '%s' type=%d\n\t'%s'\n",
	  errstr,
	  errp->lineno,
	  errp->text,
	  errp->type,
	  errp->message);

  return (1);
}

/*
 * The parser returns a very strict parse tree.
 * This function fixes up some allowable problems.
 * Currently, it just fixes the case where <A NAME="#foo"> is
 * not correctly terminated with a </A>.  In this case, it
 * moves the child of the <A> up to be a sibling.
 */
static void
Htp_Parse_Tree_Fixup_Named_Anchors(Block *bp)
{
  Block *oldsib, *tmp;

  if (HTMLGetBType(bp) == TAG) {
    if (HTMLGetBAtom(bp) == GetAtom("A")) {
      if (HTMLGetAttr(bp, "HREF") == 0) {
	/* fprintf(stderr, "Fixing bad <A>\n"); */
	if (HTMLGetBChild(bp)) {
	  oldsib = HTMLGetBSib(bp);
	  HTMLSetBSib(bp, HTMLGetBChild(bp));
	  HTMLSetBChild(bp, 0);
	  if (oldsib) {
	    tmp = HTMLGetBSib(bp);
	    while (HTMLGetBSib(tmp))
	      tmp = HTMLGetBSib(tmp);
	    HTMLSetBSib(tmp, oldsib);
	  }
	}
      }
    }
  }
  if (HTMLGetBChild(bp))
    Htp_Parse_Tree_Fixup_Named_Anchors(HTMLGetBChild(bp));
  if (HTMLGetBSib(bp))
    Htp_Parse_Tree_Fixup_Named_Anchors(HTMLGetBSib(bp));
}

/******************************************************************************
 * remember to call HTMLFreeParseTree(bp) to free the "Block*" data returned
 * by this proc.
 ******************************************************************************/
char err_buf[BUFSIZ];
Block *
Htp_Parse_Tree_From_HTML_File(char* parse_file, char* err_filepath)
{
  Block *bp;
  extern FILE *yyin;

  (void) strcpy(err_buf, "");	/* clear error buffer */

  if (parse_err_fp != NULL)	/* just in case an error occurred on previous pass */
    fclose(parse_err_fp);	/* close/discared the previous err log */

  parse_err_fp = fopen(err_filepath, "w");
  if (parse_err_fp == NULL) {
    parse_err_fp = stderr;
  }

  yyin = fopen(parse_file, "r");
  if (yyin == NULL) {
    if (errno < sys_nerr)
      (void) sprintf(err_buf, "Error opening HTML file: %s", sys_errlist[errno]);
    else
      (void) strcpy(err_buf, "Error opening HTML file: unknown error.");
    return (NULL);
  }

  HTMLSetErrorHand(Htp_HTML_Error_Hook);

  /* HTMLParseBegin(); */ /* ---> call this only once, currently called from WWWeasel.c:winterp_embedded_init_xlisp_objs */
  current = _initTree();	/* this replaces the chief need for HTMLParseBegin(), requires include of html/htmlloc.h (which has some name conflcts w/ XLISP, thus this separate file for these few functions */
  HTMLParseDo(yyin);
  bp = HTMLParseEnd();		/* this calls free(current). */

  Htp_Parse_Tree_Fixup_Named_Anchors(bp);

  fclose(parse_err_fp);
  parse_err_fp = NULL;

  return (bp);
}
