/*////////////////////////////////////////////////////////////////////////
Copyright (c) 1994-1999 Yutaka Sato
Copyright (c) 1994-1999 Electrotechnical Laboratry (ETL), AIST, MITI

Permission to use, copy, and distribute this material for any purpose
and without fee is hereby granted, provided that the above copyright
notice and this permission notice appear in all copies.
ETL MAKES NO REPRESENTATIONS ABOUT THE ACCURACY OR SUITABILITY OF THIS
MATERIAL FOR ANY PURPOSE.  IT IS PROVIDED "AS IS", WITHOUT ANY EXPRESS
OR IMPLIED WARRANTIES.
/////////////////////////////////////////////////////////////////////////
Content-Type:	program/C; charset=US-ASCII
Program:	url.c (rewrite for relayed-URL in the HTML)
Author:		Yutaka Sato <ysato@etl.go.jp>
Description:

    REWRITING RULE

      Uniform rewriting rule for URLs to be gatewayed by HTTP is:

	N://H:P/F  <=> http://${delegate}/-_-N://H:P/F

      Special rewriting rule for Gopher URL to be gatewayed by Gopher is:

	G://H:P/gF <=> G://${delegate}/g-_-G://H:P/gF

History:
	March94	created
	941224	changed the rewriting rule
//////////////////////////////////////////////////////////////////////#*/
#include "delegate.h"
#include "url.h"

int ENABLE_ODGU = 0;

#define TAGTRACE 0

#include <ctype.h>
extern char *Sprintf();
extern char *getv();
extern char *CTX_mount_url_fromL();
extern char *CTX_get_modifires();
extern char *scan_URI_scheme();

int GOPHER_ON_HTTP = 1;

extern int URICONV_ANY;
extern int URICONV_FULL;
extern int URICONV_MOUNT;
extern int URICONV_NORMAL;
extern int URICONV_PARTIAL;
extern int TAGCONV_KILL;
extern int TAGCONV_APPLET;
extern int TAGCONV_JAVA;
extern unsigned char TAGCONV_TAGEND[];

/*
 *	SEARCH URL REFERENCE IN HTML
 *	(half done and dangerous X-<)
 */

static isBASE(tag)
	char *tag;
{
	if( tag != NULL && strncasecmp(tag,"<BASE",5) == 0 && isspace(tag[5]) )
		return 1;
	return 0;
}

char *html_nextTagAttr(html,ctype,rem,tagp,attrp,convmaskp)
	char *html,*ctype,*rem,**tagp,**attrp;
	int *convmaskp;
{	char *str,*ref,*top,*tag,*attr;
	unsigned char ch,*p;
	char *strcasestr();
	int len;
	int convmask;
	int cvmb;
	int isendtag;
	unsigned char *atp;
	char fname[32];
	char quotech,*attrtailp;

	top = NULL;
	tag = NULL;
	attr = NULL;
	str = html;
	quotech = 0;
	if( convmaskp ) convmask = *convmaskp;

	if( *ctype != 't' && strncasecmp(ctype,"text/",5) != 0 ){
	/* not in message body */
		if( strncasecmp(str,"WWW-Authenticate:",17) == 0
		 || strncasecmp(str,"Proxy-Authenticate:",19) == 0 )
		if( ref = strcasestr(str,"Realm=<") ){
			/* if attrTobeConv() */ {
				attr = str;
				ref += 7;
				top = ref;
				goto exit;
			}
		}
		if( strncasecmp(str,"Location:",9) == 0 
		 || strncasecmp(str,"URI:",4) == 0 ){
			ref = strchr(str,':') + 1;
			while( *ref == ' ' )
				ref++;
			if( convmaskp ) *convmaskp = convmask;
			wordscanY(str,fname,sizeof(fname),"^:");
			if( HTML_attrTobeConv(fname,"Header",convmaskp) ){
				attr = str;
				top = ref;
				goto exit;
			}
		}
	}

	/*
	 * The following code seems to make redundant search for attribute
	 * even when not in a TAG ...
	 * Maybe it is to cope with not only TAG but also HTTP header ... 
	 * or because multiple attributes are in a TAG but "tagp" is not
	 * restored ? or... most likely,
	 * just because it did not care TAG when it is created originally...
	 */
	atp = NULL;
	for(;;){
		isendtag = 0;
		if( atp == TAGCONV_TAGEND )
			tag = NULL;

/*
Probably this is obsolete, introduced in 2.8.33 where attribute was
naively searched after any white space, and at the top of each line.
After tag symbols has become to be cared, in 6.1.20, such line
beginning with a tag seems to be excluded.
		if( str == html && *str != '<' && *str != '>' )
*/
		if( 0 )
			ref = html;
		else{
			for( ref = str; ch = *ref; ref++ ){
				if( ch == '<' )
				{
					p = ref + 1;
					if( *p == '/' ) p++;
					if( *p != 0 && !isalpha(*p) )
						continue; /* not a tag */
					for(; *p; p++ )
						if( !isalpha(*p) )
							break;
					if( *p != 0 && !isspace(*p) )
						continue; /* not a tag */

					tag = ref;
					isendtag = ref[1] == '/';
				}
				else
				if( ch == '>' )
				{
					if( tag != NULL ){
/*
						if( isendtag )
*/
							break;

					/* can be bad for begin tags with
					 * multiple attributes to be rewriten
					 * with "tagp" info. which is not
					 * availabe for secondary or after
					 * attr. in the current implementation.
					 * It must be fixed to make multiple
					 * attributes rewriting.
					(6.1.20)
					(7.6.1) this comment (maybe) about
					"isendtag" seems misunderstanding
					thinking the tag is interpreted right
					succeeding SPACE char.)...?
					Interpreting a tag after closing ">" char.
					seems not to affect any attribute
					rewriting.
					 */
					}
					tag = NULL;
					isendtag = 0;
				}

				/*
				this should be so, but can be bad for
				TAG-independent attribute rewriting?
				 */
				if( tag == NULL )
					continue;

				if(  isspace(ch) )
					break;
				if( ch == '(' ){
					/* can be a JavaScript function call */
					ref++;
					break;
				}
				if( ch == ';' || ch == '"' || ch == '\'' ){
					ref++;
					break;
				}
			}
		}
			for(; ch = *ref; ref++ )
				if( !isspace(ch) )
					break;

		if( rem != NULL && tag != NULL && *ref == 0 ){
			for( p = tag+1; *p; p++ )
				if( !isalpha(*p) )
					break;
			if( *p == 0 ){
				sv1log("##truncated tag-name:%s\n",tag);
				goto push;
			}
			while( isspace(*p) )
				p++;
			if( *p == 0 ){
				sv1log("##truncated tag-body:%s\n",tag);
				goto push;
			}
		}
		if( *ref == 0 )
			break;

		if( *ref == '<' && str < ref ){
			str = ref;
			continue;
		}

		if( *ref == '<' )
			tag = ref;

		attr = ref;

		if( *attr == '>' )
			atp = TAGCONV_TAGEND;
		else	atp = attr;

		if( TAGTRACE ){
			char t[9],a[13];
			wordScan(atp,a);
			wordScan(tag?tag:"",t);
			sv1log("## TAG=%8X[%-8s] ATTR=[%-12s]\n",tag,t,a);
		}
		attrtailp = 0;
		if( rem != NULL && tag != NULL && isalpha(*atp) ){
			for( p = atp+1; *p; p++ )
				if( !isalpha(*p) )
					break;
			if( *p == 0 ){
				sv1log("##truncated attr-name:%s\n",atp);
				goto push;
			}
			while( isspace(*p) )
				p++;
			if( *p == '=' ){
				p++;
				while( isspace(*p) )
					p++;
				quotech = 0;
				if( *p == '"' || *p == '\'' ){
					quotech = *p;
					p++;
				}
				for(; ch = *p; p++ ){
					if( quotech != 0 ){
						if( ch == quotech )
							break;
					}else{
						if( ch == '>' || isspace(ch) )
							break;
					}
				}
			}
			if( *p == 0 ){
				sv1log("##truncated attr-value:%s\n",atp);
				goto push;
			}
			if( *p == quotech )
				p++;
			while( isspace(*p) )
				p++;
			attrtailp = p;
		}

		if( convmaskp ) *convmaskp = convmask;
		len = HTML_attrTobeConv(atp,tag,convmaskp);
		if( len == 0 ){
			str = ref + 1;
			if( (p = attrtailp) && *p == 0 ){
				sv1log("##truncated tag-body:%s\n",str);
				goto push;
			}
			continue;
		}
		if( atp == TAGCONV_TAGEND ){
			top = ref;
			goto exit;
		}
		p = ref + len;

		while( isspace(*p) )
			p++;

		switch( *p ){
			case 0:   goto push;
			case '=': p++; break;
			default:  str = ref + 1; continue;
		}

		while( isspace(*p) )
			p++;

		if( *p == '"' || *p == '\'' )
		{
			quotech = *p;
			p++;
		}

push:
		attrtailp = p;
		if( rem != NULL ){
			for( attrtailp = p; ch = *attrtailp; attrtailp++ ){
				if( quotech != 0 && ch == quotech
				 || quotech == 0 && (isspace(ch) || ch == '>')
				){
					break;
				}
			}
		}

		if( rem != NULL && *attrtailp == 0 ){
			/* pushing a tag fragment from its begining is
			 * required in recent implementation ...
			 */
			if( tag && strlen(tag) < 1024 ){
				strcpy(rem,tag);
				*tag = 0;
			}else
			if( strlen(ref) < 1024 ){
			strcpy(rem,ref);
			*ref = 0;
			}else{
sv1log("#### TOO LONG TO PUSH (%d): %s\n",strlen(ref),ref);
			}
			top = NULL;
			goto exit;
		}
		top = p;
		goto exit;
	}
exit:
	if( tagp != NULL )
		*tagp = tag;
	if( attrp != NULL )
		*attrp = attr;
	return top;
}

/*
 *	TRANSFORM delegated-URL to NORMAL URL:
 *	Delagation information embedded in the URL is removed, and paresd.
 *	"url" string passed from caller will be over wrote.
 */
static char *printFlags(Conn,s)
	Connection *Conn;
	char *s;
{
	return Sprintf(s,"=%s=",DELEGATE_FLAGS);
}
char *endofHOSTPORT = "/? \t\r\n";

url_rmprefix(proto,prefix)
	char *proto,*prefix;
{	char *p;
	int len;
	char dch;

	prefix[0] = 0;
	if( strstr(proto,NDGU_MARK) == proto ){
		p = proto + strlen(NDGU_MARK);
		dch = *p;
		if( dch == '=' || dch == '/' ){
			for( p++; *p; p++ ){
				if( *p == dch ){
					len = p - proto + 1;
					strncpy(prefix,proto,len);
					prefix[len] = 0;
					strcpy(proto,p+1);
					break;
				}
			}
		}
	}
}

isLoadableURL(url)
	char *url;
{
	if( strncasecmp(url,"ftp://",6) == 0
	 || strncasecmp(url,"file:",5) == 0
	 || strncasecmp(url,"data:",5) == 0
	 || strncasecmp(url,"builtin:",8) == 0
	 || strncasecmp(url,"http://",7) == 0 )
		return 1;
	return 0;
}

fromProxyClient(url)
	char *url;
{	int from_proxy = 0;
	char *sp,proto[32];

	if( strncasecmp(url,"http://",  7) == 0 ) return 1;
	if( strncasecmp(url,"nntp://",  7) == 0 ) return 1;
	if( strncasecmp(url,"wais://",  7) == 0 ) return 1;
	if( strncasecmp(url,"ftp://",   6) == 0 ) return 1;
	if( strncasecmp(url,"gopher://",9) == 0 ) return 1;

	if( url[0] != '/' ){
		if( sp = scan_URI_scheme(url,proto,sizeof(proto)) ){
			if( strncmp(sp,"://",3) == 0 )
				if( strstr(url,NDGU_MARK) == NULL )
					from_proxy = 1;
		}
	}
	return from_proxy;
}
is_redirected_url(url)
	char *url;
{
	if( strstr(url,ODGU_MARK) ) return 1;
	if( strstr(url,NDGU_MARK) ) return 1;
	return 0;
}
is_redirected_selector(sel)
	char *sel;
{
	if( strncmp(sel,NDGU_MARK,strlen(NDGU_MARK)) == 0 )
		return 1;
	return 0;
}

static char *scan_flags(np,tp,flags)
	char *np,*tp,*flags;
{	char *fp;

	if( *np == '+' || *np == '-' || *np == '=' ){
		if( fp = (char*)strchr(np+1,'=') ){
			*fp = 0;
			switch( *np ){
				case '+': onoff_flags(flags,np+1,1); break;
				case '-': onoff_flags(flags,np+1,0); break;
				case '=': wordscanX(np+1,flags,64); break;
			}
			strcpy(tp,fp+1);
			np = tp;
		}
	}
	return np;
}

static put_gtype(sel,gtype,toproxy)
	char *sel;
{	char ssel[URLSZ];

	if( !toproxy  || gtype == '7' ){
		if( gtype==' ' || gtype=='\t' || gtype=='\r' || gtype=='\n' )
			gtype = '1';
		strcpy(ssel,sel);
		sprintf(sel,"(:%c:)%s",gtype,ssel);
	}
}
get_gtype(gsel,sel)
	char *gsel,*sel;
{	int gtype;
	char path[1024];

	if( gsel[0]=='(' && gsel[1]==':' && gsel[3]==':' && gsel[4]==')' ){
		gtype = gsel[2];
		if( sel ) strcpy(sel,gsel+5);
	}else{
		gtype = gsel[0];
		if( gtype=='\n' || gtype=='\r' || gtype=='\t' || gtype==0 )
			gtype = '1';
		else
		if( sscanf(gsel,"%s",path) && path[strlen(path)-1] == '/' )
			gtype = '1';
		else
		if( !strchr("0123456789gIT",gtype) )
			gtype = '9';

		if( sel ) ovstrcpy(sel,gsel);
	}
	return gtype;
}


static scan_modifier1(mod1,flags)
	char *mod1,*flags;
{
	if( strncmp(mod1,"cc.",3) == 0 )
		scan_CODECONV(mod1+3,CCV_TOCL,1);
	else
	if( strncmp(mod1,"cs.",3) == 0 )
		scan_CODECONV(mod1+3,CCV_TOSV,1);
	else
	if( mod1[0] == 'F' )
		strcpy(flags,mod1+1);
	return 0;
}
static scan_modifiers(ctx,mods,flags)
	void *ctx;
	char *mods,*flags;
{
	CTX_set_modifires(ctx,mods);
	scan_commaList(mods,0,scan_modifier1,flags);
}

CTX_url_dereferN(ctx,cproto,url,modifiers,flags,proto,host,iportp,durl,marklen)
	void *ctx;
	char *cproto,*url,*modifiers,*flags,*proto,*host;
	int *iportp;
	char *durl;
{	char protob[URLSZ],port[URLSZ],urlh[URLSZ],*up;
	char modb[1024],*pb,*pp,ch,*np,gtype;
	int len,ni;

	if( durl[marklen] == '/' && durl[marklen+1] != '/' ){
		pb = modb;
		for( pp = durl + marklen + 1; ch = *pp++; ){
			if( ch == ':' )
				break;
			if( ch == '/' )
				break;
			*pb++ = ch;
			if( isspace(*pp) )
				break;
		}
		if( ch == '/' ){
			*pb = 0;
			if( modifiers != NULL )
				strcpy(modifiers,modb);

			scan_modifiers(ctx,modb,flags);
			strcpy(durl+marklen,pp);
		}else{
			strcpy(durl+marklen,durl+marklen+1);
		}
	}else
	if( url < durl ){
		modb[sizeof(modb)-1] = 0;
		pb = &modb[sizeof(modb)-1];

		if( durl[-1] == ')' ){
		    for( pp = durl - 2; url <= pp; pp-- ){
			if( *pp == '(' ){
				strcpy(pp,durl);
				durl = pp;
				break;
			}
			*--pb = *pp;
		    }
		}else{
		    for( pp = durl - 1; url <= pp; pp-- ){
			if( *pp == '/' || isspace(*pp) ){
				strcpy(pp+1,durl);
				durl = pp + 1;
				break;
			}
			*--pb = *pp;
		    }
		}

		if( pp = strstr(pb,"-.-") ){
			*pp = 0;
			strcpy(urlh,durl);
			sprintf(durl,"%s:///%s%s",NDGU_MARK,pp+3,urlh);
		}
		if( modifiers != NULL )
			strcpy(modifiers,pb);
		scan_modifiers(ctx,pb,flags);
	}

	if( &url[1] < durl && strcaseeq(cproto,"http") )
		return 0;

	np = durl + marklen;
	np = scan_flags(np,durl,flags);
	unescape_specials(np,":","//");

	port[0] = 0;
	if( strncmp(np,":///",4) == 0 ){
		protob[0] = 0;
		strcpy(host,"localhost");
		*iportp = SERVER_PORT();
		strcpy(durl,np+4);
		return 1;
	}
	if( strncmp(np,"://",3) == 0 )
		strcpy(np,np+1);

	ni = scan_protositeport(np,protob,host,port);

	if( ni == 2 || ni == 3 ){
		strcpy(proto,protob);
		up = urlh;
		up = Sprintf(up,"%s://%s",proto,host);
		if( proto[0] == 0 )
			strcpy(proto,cproto);
		if( ni == 2 )
			*iportp = serviceport(proto);
		else{	*iportp = atoi(port);
			up = Sprintf(up,":%s",port);
		}
		len = up - urlh;

		/* gopher://HP/G-_-gopher://...
		 * seems to no more be supported
		 */
		gtype = 0;
		/* skip "/Gtype" */
		if( streq(cproto,"gopher") && streq(proto,"gopher") ){
			if( np[len] == '/' ){
				len++;
				if( gtype = np[len] )
				if(strchr(endofHOSTPORT,gtype)==NULL){
					len++;
				}
			}
		}
		if( url < durl && durl[-1] == '/' && np[len] == '/' )
			len += 1;
		strcpy(durl,np+len);
		if( gtype )
			put_gtype(durl,gtype,0);
		return 1;
	}
	return -1;
}
CTX_url_dereferO(ctx,cproto,url,modifiers,flags,proto,host,iportp,durl,marklen)
	void *ctx;
	char *cproto,*url,*modifiers,*flags,*proto,*host;
	int *iportp;
	char *durl;
{	char *hp,*np,gtype;
	int ni;

	np = durl + marklen;
	np = scan_flags(np,durl,flags);

	/*
	 *	Gopher		=@=gopher:H:P=Gtype
	 *		'Gtype' is used by Gopher/DeleGates who doesn't know
	 *		what type the requested infomation is.
	 *	Ftp/Gopher	=@=ftp:H:P=Gtype
	 *		'Gtype' may be used to determine whether P is a
	 *		directory or a flat file.
	 *	
	 */
	if( (ni = sscanf(np,"%[^:]:%[^:]:%d=%c",proto,host,iportp,&gtype)) == 4
	 || (ni = sscanf(np,"%[^:]:%[^=]=%c",   proto,host,&gtype)) == 3 )
	{
		if( ni == 3 )
			*iportp = serviceport(proto);
		if( hp = strpbrk(np+strlen(host),endofHOSTPORT) )
			strcpy(durl,hp);
		put_gtype(url,gtype,0);
		return 1;
	}

	/*
	 *	Genric     =@=proto:H:P
	 */
	ni = sscanf(np,"%[^:]:%[^:/? \t\r\n]:%d",proto,host,iportp);
	if( 2 <= ni ){
		if( ni == 2 )
			*iportp = serviceport(proto);
		if( hp = strpbrk(np+strlen(host),endofHOSTPORT) )
			strcpy(durl,hp);
		return 1;
	}

	/*
	 *	HTTP-Special /=@=:H:P
	 */
	ni = sscanf(np,":%[^:/? \t\r\n]:%d",host,iportp);
	if( 1 <= ni ){
		if( ni == 1 )
			*iportp = serviceport("http");
		if( hp = strpbrk(np+strlen(host),endofHOSTPORT) ){
			if( durl[-1] == '/' && hp[0] == '/' )
				strcpy(durl,hp+1);
			else	strcpy(durl,hp);
		}
		return 1;
	}

	proto[0] = host[0] = 0;
	return 0;
}
CTX_url_derefer(ctx,cproto,url,modifiers,flags,proto,host,iportp)
	void *ctx;
	char *cproto,*url,*modifiers,*flags,*proto,*host;
	int *iportp;
{	char *durl;
	int rcode;

	if( durl = strstr(url,NDGU_MARK) ){
		rcode = CTX_url_dereferN(ctx,cproto,url,modifiers,flags,proto,host,iportp,
				durl,strlen(NDGU_MARK));
		if( rcode != -1 )
			return rcode;
	}

	if( ENABLE_ODGU )
	if( durl = strstr(url,ODGU_MARK) ){
		rcode = CTX_url_dereferO(ctx,cproto,url,modifiers,flags,proto,host,iportp,
				durl,strlen(ODGU_MARK));
		if( rcode != -1 )
			return rcode;
	}
	return 0;
}

url_undelegate(urlxa)
	char *urlxa;
{	char *tp,*up;
	int len;
	char xvalues[URLSZ];
	char *xav[64];
	int xac;
	char *xflags,*xproto,*xhostport,*xgtype,*xpath,*xsearch;
	char *sp;
	char urlx[URLSZ];

	nonxalpha_unescape(urlxa,urlx,0);
        if( strncmp(urlx,ODGU_MARK,strlen(ODGU_MARK)) != 0 )
		return 0;

	tp = urlx;
	up = tp + strlen(ODGU_MARK);
	len = scan_urlx(up,xvalues);
	if( len <= 0 )
		return 0;

	xac = stoV(xvalues,64,xav,'\n');
	xflags    = getv(xav,"xflags");
	xproto    = getv(xav,"xproto");
	xhostport = getv(xav,"xhostport");
	xgtype    = getv(xav,"xtype");

	if( xhostport == 0 )
		return 0;

	strcpy(tp,up+len);

	if( xproto == 0 )
		xproto = "http";

	sp = strstr(urlx,"://");
	if( sp == 0 )
		return 0;

	sp += 3;
	while( *sp && !strchr("/ \t\r\n\"",*sp) )
		sp++;

	if( sp[0] == '/' && sp[1] == '/' )
		sp += 1;

	sprintf(urlxa,"%s://%s",xproto,xhostport);
/* nonxalpha_escape(sp,urlxa+strlen()); */
	strcat(urlxa,sp);
	return 1;
}

/*
 *  site = user:pass@host:port
 *  site = [ [ user [ : pass ] @ ] hostport ]
 *  unreserved = A-Z a-z 0-9 $-_.!~*'(), 
 *  user = *( unreserved | escaped | ;&=+ )
 *  pass = *( unreserved | escaped | ;&=+ )
 */
char *scan_URI_scheme(url,scheme,size)
	char *url,*scheme;
{	char *up;
	unsigned char uc;
	int sx;

	sx = 0;
	for( up = url; uc = *up; up++ ){
		if( size <= sx + 1 )
			break;
		if( uc == ':' || isspace(uc) )
			break;
		else	scheme[sx++] = uc;
	}
	scheme[sx] = 0;
	return url + strlen(scheme);
}
char *scan_URI_site(url,site,size)
	char *url,*site;
{	char buff[512];
	int len;

	if( size == 0 )
		size = 248; /* 7 bytes for :port-# ... */
	len = sizeof(buff);
	if( size < len )
		len = size;
	Strncpy(buff,url,len);
	*site = 0;
	sscanf(buff,"%[-.A-Za-z0-9:@%%$_!~*'(),;&=+#]",site);
	return url += strlen(site);
}
decomp_URL_site(site,userpasshost,port)
	char *site,*userpasshost,*port;
{	char *up,*pp;

	*userpasshost = 0;
	*port = 0;
	if( up = strrchr(site,'@') ){
		if( pp = strchr(up,':') ){
			*pp++ = 0;
			strcpy(port,pp);
		}
		strcpy(userpasshost,site);
	}else{
		sscanf(site,"%[^:]:%s",userpasshost,port);
	}
}
decomp_URL_siteX(site,userpass,user,pass,hostport,host,port)
	char *site,*userpass,*user,*pass,*hostport,*host,*port;
{	char *userp,*passp,*portp;

	strcpy(hostport,site);
	if( userp = strrchr(hostport,'@') ){
		*userp++ = 0;
		strcpy(userpass,hostport);
		if( passp = strchr(hostport,':') )
			*passp++ = 0;
		else	passp = "";
		nonxalpha_unescape(hostport,user,1);
		nonxalpha_unescape(passp,pass,1);
		ovstrcpy(hostport,userp);
	}else{
		*userpass = *user = *pass = 0;
	}

	strcpy(host,hostport);
	if( portp = strchr(host,':') ){
		*portp++ = 0;
		strcpy(port,portp);
	}else{
		*port = 0;
	}

Verbose("S[%s] = UP[%s]U[%s]P[%s] + HP[%s]H[%s]P[%s]\n",
site, userpass,user,pass, hostport,host,port);
}

extern char *scan_userpassX();
char *scan_url_userpass(server,user,pass,dfltuser)
	char *server,*user,*pass,*dfltuser;
{	char ub[128],wb[128],*sp;
	AuthInfo ident;

	sp = scan_userpassX(server,&ident);
	wordScan(ident.i_user,ub);
	textScan(ident.i_pass,wb);
	if( *sp != '@' ){
		strcpy(user,dfltuser);
		pass[0] = 0;
		return server;
	}
	nonxalpha_unescape(ub,user,1);
	nonxalpha_unescape(wb,pass,1);
	return sp + 1;
}
scan_protositeport(url,proto,userpasshost,port)
	char *url,*proto,*userpasshost,*port;
{	char ch,*sp;
	char site[256];

	sp = url;
	if( *sp != '/' )
		sp = scan_URI_scheme(sp,proto,64);
	else	proto[0] = 0;
	if( *sp == ':' )
		sp++;

	if( strncmp(sp,"//",2) == 0 )
		sp += 2;
	else
	if( *sp == '/' )
		sp += 1; /* for IE4.0 */
	else	return 0;

	scan_URI_site(sp,site,sizeof(site));
	decomp_URL_site(site,userpasshost,port);

	if( *port == 0 )
		return 2;
	else	return 3;
}
url_serviceport(url)
	char *url;
{	char proto[32];

	scan_URI_scheme(url,proto,sizeof(proto));
	return serviceport(proto);
}

#define SITEC(c) ((c & 0x80) == 0 && 0x20 < c && c != '/' && c != '?')
#define PATHC(c) (c != '\r' && c != '\n')

static char *urlpathp;
decomp_absurl(url,proto,login,upath,ulen)
	char *url,*proto,*login,*upath;
{	char *up,*bp,buf[256],*ux;
	char *bx;
	unsigned char uc;
	
	up = url;
	urlpathp = 0;

	if( proto ) *proto = 0;
	if( login ) *login = 0;
	if( upath ) *upath = 0;

	bp = buf;
	/* causes bound-check error when "url" is a static string constant
	ux = up + 32 - 1;
	while( up < ux && (uc = *up) && uc != ':' ){ *bp++ = *up++; } *bp = 0;
	*/
	bx = bp + 32 - 1;
	while( bp < bx && (uc = *up) && uc != ':' ){ *bp++ = *up++; } *bp = 0;
	if( proto ) strcpy(proto,buf);
	if( *up++ != ':' ) return 0;
	if( *up++ != '/' ) return 1;
	if( *up++ != '/' ) return 1;

	bp = buf;
	ux = up + sizeof(buf) - 1;
	while( up < ux && (uc = *up) && SITEC(uc) ){ *bp++ = *up++; } *bp = 0;
	if( login ) strcpy(login,buf);
	urlpathp = up;
	if( *up == '?' ) ; else
	if( *up++ != '/' ) return 2;

	if( upath == 0 )
		return 3;
	bp = upath;
	ux = url + (ulen - 1);
	while( up < ux && (uc = *up) && PATHC(uc) ){ *bp++ = *up++; } *bp = 0;

	return 3;
}

strip_urlhead(url,proto,login)
	char *url,*proto,*login;
{	char rc,*rp,*tp;
	int ni;

	ni = decomp_absurl(url,proto,login,NULL,0);
	if( 2 <= ni ){
		if( *urlpathp == '/' )
			ovstrcpy(url,urlpathp);
		else	sprintf(url,"/%s",urlpathp);
	}
	return ni;
}

char *scan_userpassX(userpass,ident)
	char *userpass;
	AuthInfo *ident;
{	char *xp,*hp,*pp,*np;

	bzero(ident,sizeof(AuthInfo));
	lineScan(userpass,ident->i_user);
	ident->i_pass[0] = 0;

	if( xp = strpbrk(ident->i_user,"/?\r\n") )
		*xp = 0;
	if( hp = strrchr(ident->i_user,'@') )
		*hp = 0;
	if( pp = strchr(ident->i_user,':') ){
		*pp = 0;
		textScan(pp+1,ident->i_pass);
	}
	if( hp )
		np = &userpass[hp-ident->i_user];
	else	np = &userpass[strlen(userpass)];
	return np;
}
extern char *wordscanY();
#define EOHN	"^:/? \t\r\n\f\""
decomp_siteX(proto,site,ident)
	char *proto,*site;
	AuthInfo *ident;
{	char *xp,*pp;

	xp = scan_userpassX(site,ident);
	if( *xp == '@' ){
		site = xp + 1;
	}else{
		ident->i_user[0] = 0;
		ident->i_pass[0] = 0;
	}

	pp = wordscanY(site,ident->i_Host,sizeof(ident->i_Host),EOHN);
	if( *pp == ':' )
		pp++;
	else	pp = "";
	if( pp[0] )
		return ident->i_Port = atoi(pp);
	else	return ident->i_Port = serviceport(proto);
}
site_strippass(site)
	char *site;
{	char *xp;
	AuthInfo ident;

	xp = scan_userpassX(site,&ident);
	if( *xp == '@' ){
		sprintf(site,"%s%s",ident.i_user,xp);
	}
}
url_strippass(url)
	char *url;
{	char *sp;

	if( sp = strstr(url,"://") )
		site_strippass(sp+3);
}
scan_hostportX(proto,hostport,host,hsiz)
	char *proto,*hostport,*host;
{	int port;
	char *pp;

	port = 0;
	pp = wordscanY(hostport,host,hsiz,EOHN);
	if( *pp == ':' )
		port = atoi(pp+1);
	if( port == 0 )
		port = serviceport(proto);
	return port;
}
scan_hostport1X(hostport,host,hsiz)
	char *hostport,*host;
{	char ch,*sp,*dp;
	char *pp;
	char *xp;
	int port;

	xp = &host[hsiz-1];
	dp = host;
	port = 0;
	pp = 0;

	for( sp = hostport; ch = *sp; sp++ ){
		if( xp <= dp )
			break;
		else
		switch( ch ){
		    case '/': case '?':
		    case ' ': case '\t': case '\r': case '\n': case '\f':
			goto EXIT;

		    case ':':
			/* might be one in "user:pass@host" */
			port = atoi(sp+1);
			pp = dp;
			break;

		    case '@':
			dp = host;
			port = 0;
			pp = 0;
			break;

		    default:
			if( (ch & 0x80) || ch <= 0x20 )
				goto EXIT;
			if( dp != sp )
				*dp++ = ch;
			break;
		}
	}
EXIT:
	if( pp ) *pp = 0;
	if( *dp != 0 )
		*dp = 0;
	return port;
}
scan_hostport1pX(proto,login,host,hsiz)
	char *proto,*login,*host;
{	int port;

	port = scan_hostport1X(login,host,hsiz);
	if( port == 0 )
		port = serviceport(proto);
	return port;
}
scan_hostport0(hostport,host)
	char *hostport,*host;
{	char ch,*sp,*dp;
	int port;

	port = 0;
	dp = host;
	for( sp = hostport; ch = *sp; sp++ ){
		if( ch == ':' ){
			port = atoi(sp+1);
			break;
		}
		if( strchr("/ \t\r\n",ch) )
			break;
		*dp++ = ch;
	}
	*dp = 0;
	return port;
}
scan_hostport(proto,hostport,host)
	char *proto,*hostport,*host;
{	int iport;

	iport = scan_hostport0(hostport,host);
	if( iport == 0 ){
		iport = serviceport(proto);
		/*
		if( iport == 0 )
		syslog_ERROR("## standard port for `%s' is unknown\n",proto);
		*/
	}
	return iport;
}

/*
 *	EXPAND PARTIAL HTTP-URL TO FULL SPEC URL:
 *	Absolute path in URL which have no http://H:P should be expanded to
 *	full description of URL, that is with http://HOST:PORT.
 *	Relative path will be expanded with http:H:P in the HTTP clients.
 */
char *HostPort(hostport,proto,host,port)
	char *hostport,*proto,*host;
{
	if( serviceport(proto) != port )
		sprintf(hostport,"%s:%d",host,port);
	else	strcpy(hostport,host);
	return hostport;
}

#define isSchemeChar(ch)	(isalnum(ch)||(ch)=='+'||(ch)=='-'||(ch)=='.')

isFullURL(url)
	char *url;
{	char *up,ch,proto[128],*p;

	if( !isSchemeChar(url[0]) )
		return 0;

	p = proto;
	for( up = url; isSchemeChar(ch = *up); up++ )
	{
		if( 32 <= p - proto )
			break;
		*p++ = ch;
	}
	*p = 0;

	if( up[0] == ':' ){
		if( up[1] == '/' && up[2] == '/' )
			return 1;
		if( localPathProto(proto) && isFullpath(&up[1]) )
			return 1;
		if( streq(proto,"news") )
			return 1;
		if( streq(proto,"mailto") )
			return 1;
	}
	return 0;
}

extern char *CTX_changeproxy_url_to();
char *CTX_changeproxy_url(ctx,clif,method,url,proxy)
	void *ctx;
	char *clif,*method,*url,*proxy;
{	char *opt,*mark;

	if( opt = CTX_changeproxy_url_to(ctx,clif,method,url,proxy) )
		return opt;

	if( url[0] == '/' )
	if( (mark = NDGU_MARK) && strncmp(url+1,mark,strlen(mark)) == 0
	 || (mark = ODGU_MARK) && strncmp(url+1,mark,strlen(mark)) == 0 )
		return CTX_changeproxy_url_to(ctx,clif,method,url+1+strlen(mark),proxy);

	return NULL;
}

url_upathbaselen(base,blen)
	char *base;
{	char *sp,tc,*tp,*xp;
	int nblen;

	sp = 0;
	xp = base + blen;
	for( tp = base; tp < xp; tp++ ){
		tc = *tp;
		if( tc == '?' )
			break;
		if( tc == '/' )
			sp = tp;
	}
	if( sp == 0 )
		nblen = 0;
	else	nblen = (sp+1) - base;
	if( nblen != blen ) 
		sv1vlog("URL BASE = %d/%d [%s]\n",nblen,blen,base);
	return nblen;
}
static setBASE(referer,url)
	Referer *referer;
	char *url;
{	char values[URLSZ],*av[64],*v1,*ap,*dp;
	int len;
	char burl[URLSZ];

	if( referer->r_altbuf == NULL )
		return;

	if( (len = scan_url1(url,values)) <= 0 )
		return;

	strncpy(burl,url,len);
	burl[len] = 0;
	sv1log("<BASE HREF=%s>\n",burl);

	stoV(values,64,av,'\n');
	ap = referer->r_altbuf;
	if( v1 = getv(av,"proto")){
		referer->r_sv.u_proto = ap;
		strcpy(ap,v1);
		ap += strlen(ap) + 1;
	}
	if( v1 = getv(av,"host") ){
		referer->r_sv.u_host = ap;
		strcpy(ap,v1);
		ap += strlen(ap) + 1;
	}
	if( v1 = getv(av,"port") )
		referer->r_sv.u_port = atoi(v1);

	if( v1 = getv(av,"path") ){
		referer->r_sv.u_path = ap;
		strcpy(ap,v1);
		ap += strlen(ap) + 1;

{
		int blen;
		referer->r_sv.u_base = ap;
		blen = url_upathbaselen(v1,strlen(v1));
		bcopy(v1,ap,blen);
		ap[blen] = 0;
		ap += blen + 2;
}
/*
		strcpy(ap,referer->r_sv.u_path);
		if( dp = strrchr(ap,'/') )
			dp[1] = 0;
		else	ap[0] = 0;
		ap += strlen(ap) + 1;
*/
	}
}
static getBASE(referer,myhp,proto,hostport,host,port,base)
	Referer *referer;
	char **myhp,**proto,**hostport,**host,**base;
	int *port;
{
	if( referer->r_vb.u_proto )
		*myhp = referer->r_vb.u_hostport;
	else	*myhp = referer->r_my.u_hostport;

	*proto = referer->r_sv.u_proto;
	*hostport = referer->r_sv.u_hostport;
	*host = referer->r_sv.u_host;
	*port = referer->r_sv.u_port;
	*base = referer->r_sv.u_base;
}

url_relative(relurl,absurl,baseurl)
	char *relurl,*absurl,*baseurl;
{	int ui,nsl;
	char *rp;

	nsl = 0;
	for( ui = 0; absurl[ui] && baseurl[ui]; ui++ ){
		if( absurl[ui] != baseurl[ui] )
			break;
		if( absurl[ui] == '/' )
			nsl++;
		if( nsl == 3 )
			break;
	}
	if( nsl == 3 ){
		relurl[0] = 0;
		return;
	}
	strcpy(relurl,absurl);
}

#define UREND(ch)	(ch=='"' || ch=='>' || isspace(ch) || ch=='\0')
#define CURDIR(u)	(u[0]=='.' && (u[1]=='/' || UREND(u[1])) ? &u[1] : 0)
#define UPDIR(u)	(u[0]=='.' ? CURDIR((&u[1])) : 0)

urlpath_normalize(url,rurl)
	unsigned char *url,*rurl;
{	unsigned char *up,*np,uc;
	unsigned char *xp;
	int norm;

	up = url;
	xp = rurl;
	norm = 0;

	while( uc = *up ){
		/*
		 * up points to the top of a URL element
		 */
		if( uc == '/' ){
			if( xp != up )
				*xp++ = uc;
			else	xp++;
			uc = *++up;
		}
		if( uc == '?' || UREND(uc) ){
			if( xp != up ){
				strcpy(xp,up);
				xp += strlen(xp);
			}
			break;
		}
		if( np = CURDIR(up) ){
			norm++;
			if( *np == '/' )
				np++;
			up = np;
			continue;
		}
		if( np = UPDIR(up) ){
			norm++;
			if( *np == '/' )
				np++;
			up = np;
			if( rurl < xp )
				xp--;
			while( rurl < xp ){
				if( *--xp == '/' ) 
					break;
			} 
			if( *xp == '/')
				xp++;
			*xp = 0;
			continue;
		}

		/*
		 * skip to the top of the next URL element
		 */
		while( uc = *up ){
			if( uc == '/' || uc == '?' || UREND(uc) )
				break;
			if( xp != up )
				*xp++ = *up++;
			else{
				xp++;
				up++;
			}
		}
	}
	if( xp != up )
		*xp = 0;

	return xp != up;
}

/*
 * care an abnormal pointer to outer space of the server ...
 * care only "../" at the top of URL to make the normalization be light weight
 */
url_normalize(base,url,blen)
	char *base,*url;
	int *blen;
{	char *up,*bp,*xp;
	int nup,abu;

	if( !CURDIR(url) && !UPDIR(url) ){
		*blen = strlen(base);
		return 0;
	}

	bp = base + strlen(base);
	up = url;
	nup = 0;
	abu = 0;
	if( xp = CURDIR(up) ){
		if( *xp != '/' )
			up = xp;
		else	up = xp + 1;
	}
	while( xp = UPDIR(up) ){
		nup++;
		if( bp == base )
			abu = 1;
		else{
			while( base < bp )
				if( *--bp == '/' )
					break;
		}
		if( *xp != '/' ){
			up = xp;
			break;
		}else{
			up = xp + 1;
		}
	}
	if( nup ){
		if( bp == base )
			abu = 1;
		else{
			while( base < bp )
				if( *--bp == '/' )
					break;
		}
	}
	if( abu && LOG_VERBOSE ){
		char ub[32];
		Strncpy(ub,url,16);
		Verbose("ABNORMAL-URL: base<%s> url<%s>\n",base,ub);
	}
	*blen = bp - base;
	return up - url;
}

java_conv(line,xline,uconvs)
	char *line,*xline;
{	int uconv,nconv;
	char *sp,*np,*xp;
	char *tagp;
	int len;
	char *tag;
	char tagb[32],*tp;

	sp = line;
	xp = xline;
	for( nconv = 0; ; nconv++ ){
		uconv = uconvs;
		np = html_nextTagAttr(sp,"",NULL,&tagp,NULL,&uconv);
		if( np == NULL )
			break;

		if( (uconv & (TAGCONV_KILL|TAGCONV_JAVA)) == 0 )
			break;

		tagb[0] == 0;
		if( tagp != NULL ){
			tp = tagp;
			if( *tp == '<' ){
				tp++;
				if( *tp == '/' )
					tp++;
				wordscanY(tp,tagb,sizeof(tagb),"^ \t\r\n>");
				tag = tagb;
			}
		}

		if( tagp != NULL ){
			if( (uconv & TAGCONV_KILL) && tagb[0] ){
			}else
			if( strncasecmp(tagp,"</APPLET",8) == 0 ) tag = "APPLET"; else
			if( strncasecmp(tagp,"<APPLET", 7) == 0 ) tag = "APPLET"; else
			if( strncasecmp(tagp,"</OBJECT",8) == 0 ) tag = "OBJECT"; else
			if( strncasecmp(tagp,"<OBJECT", 7) == 0 ) tag = "OBJECT"; else
			if( strncasecmp(tagp,"</EMBED", 7) == 0 ) tag = "EMBED";  else
			if( strncasecmp(tagp,"<EMBED",  6) == 0 ) tag = "EMBED";  else
			{
				if( TAGTRACE )
				sv1log("## TAG NOMATCH %s\n",tagp);
				tagp = NULL;
			}
		}

		if( tagp == NULL ){
			len = np - sp;
			bcopy(sp,xp,len);
			xp += len;
			sp = np;
			continue;
		}

		sv1log("## TAG %s -> killed-%s\n",tag,tag);
		len = tagp+1 - sp;
		bcopy(sp,xp,len);
		xp += len;
		sp = tagp+1;
		*xp = 0;

		if( *sp == '/' ){
			sp += 1;
			*xp++ = '/';
		}
		sp += strlen(tag);
		sprintf(xp,"killed-%s",tag);
		xp += strlen(xp);
	}
	strcpy(xp,sp);
	return nconv;
}

int url_unify_ports = 0;
#define PORT_MARK	"-.-P"
url_delport(url,portp)
	char *url;
	int *portp;
{	char *dp,port[32];

	if( dp = strstr(url,PORT_MARK) ){
		wordscanY(dp+4,port,sizeof(port),"0123456789");
		*portp = atoi(port);
		strcpy(dp,dp+4+strlen(port));
	}
}
#define EOURL	"^ \t\r\n\"'>"
url_movport(url,vurl,siz)
	char *url,*vurl;
{	char *dp,proto[64],port[32],xport[32];
	int ilen = 0;

	if( !url_unify_ports )
		return 0;

	wordscanY(url,vurl,siz,EOURL);
	if( dp = strstr(vurl,"://") )
	if( dp = strpbrk(dp+3,":/? \t\r\n\"'") )
	if( *dp == ':' ){
		ilen = strlen(vurl);
		wordscanY(dp+1,port,sizeof(port),"0123456789");
		if( port[0] ){
			sprintf(xport,"%s%s",PORT_MARK,port);
			strcpy(dp,dp+1+strlen(port));
			if( dp = strpbrk(vurl,"?#") )
				;
			else	dp = vurl + strlen(vurl);
			Strins(dp,xport);
		}
	}
	if( ilen == 0 )
		*vurl = 0;
	return ilen;
}
url_absoluteS(referer,line,xline,rem)
	Referer *referer;
	char *line,*xline,*rem;
{	char *myhp;
	char *proto;
	char *host;
	int   port;
	char *base;
	char *hp,hostportb[256];
	char *sp,*np,*xp;
	int ch;
	char *tagp;
	int uconv;

	getBASE(referer,&myhp,&proto,&hp,&host,&port,&base);

	sp = line;
	xp = xline;

	for(;;){
		uconv = URICONV_ANY;
		np = html_nextTagAttr(sp,"",rem,&tagp,NULL,&uconv);
		if( np == NULL )
			break;

		if( referer->r_altbuf != NULL && tagp != NULL && isBASE(tagp) ){
			setBASE(referer,np);
			getBASE(referer,&myhp,&proto,&hp,&host,&port,&base);
		}

		ch = np[0];
		np[0] = 0;
		strcpy(xp,sp);
		xp += strlen(xp);
		np[0] = ch;
		sp = np;

		if( strncasecmp(np,"nntp://-.-/",11) == 0 ){
			sp += 11;
			sprintf(xp,"nntp://%s/",myhp);
		}else
		if( strncasecmp(np,"http://-.-/",11) == 0 ){
			sp += 11;
			sprintf(xp,"http://%s/",myhp);
		}else
		if( isFullURL(np) ){
		}else
		if( strncasecmp(np,"http:/",6) == 0 ){
		    if( np[6] != '/' ){
			sp += 6;
			HostPort(hostportb,"http",host,port);
			sprintf(xp,"http://%s/",hostportb);
		    }
		}
		else
		if( ch != '/' && streq(proto,"ftp") )
		{
			/* Relay ftp to the proxy server for non-proxy client,
			 * who see current protocol as HTTP, thus will not
			 * make automatic expansion of relative URL of ftp type.
			 */
			if( ch == '.' && np[1] == '/' )
				sp += 2;
			strcpy(xp,base);
		}
		else
		if( ch == '/' && np[1] != '/' ){
			/* Absolute path without host:port. This will be cause
			 * ignoreing =@=:realhost:realport part in the current
			 * page's URL
			 */
			sp += 1;
			sprintf(xp,"%s://%s/",proto,hp);
		}
		else
		if( ch == '/' && np[1] == '/' ){ /* with host:port */
			sp += 2;
			sprintf(xp,"%s://",proto);
		}
		else
		if( uconv & (URICONV_FULL|URICONV_NORMAL) ){
			int uplen,blen;

/*
			if( *base == '/' ) base++;
*/
			uplen = url_normalize(base,sp,&blen);

			if( (uconv & URICONV_FULL) || uplen ){
				sprintf(xp,"%s://%s/",proto,hp);
				sp += uplen;
				if( 0 < blen ){
					xp += strlen(xp);
					bcopy(base,xp,blen);
					if( xp[blen-1] != '/' )
						strcpy(&xp[blen],"/");
					else	xp[blen] = 0;
				}
			}
		}
		if( url_unify_ports ){
			if( *xp ){
				char nb[512],*tp,*up;
				if( strncasecmp(xp,"http://",7) == 0 ){
					tp = xp + strlen(xp);
					up = wordscanY(sp,tp,256,EOURL);
					if( url_movport(xp,nb,sizeof(nb)) ){
						strcpy(xp,nb);
						sp = up;
					}else	*tp = 0;
				}
			}else{
				int ilen;
				if( strncasecmp(np,"http://",7) == 0 ){
					if( ilen = url_movport(np,xp,256) )
						sp += ilen;
				}
			}
		}
		xp += strlen(xp);
	}
	strcpy(xp,sp);
}
url_absolute(myhp,proto,host,port,base,line,xline,rem)
	char *myhp,*proto,*host,*base,*line,*xline,*rem;
{	Referer referer;
	char hostport[128];

	referer.r_my.u_hostport = myhp;
	referer.r_sv.u_hostport = HostPort(hostport,proto,host,port);
	referer.r_sv.u_proto = proto;
	referer.r_sv.u_host = host;
	referer.r_sv.u_port = port;
	referer.r_sv.u_base = base;
	referer.r_altbuf = NULL;
	url_absoluteS(&referer,line,xline,rem);
}

/*
 *	TRANSFORM URL TO delegated-URL
 *	This function assumes that URLs in the "line" is in FULL-SPEC
 *	format of URL without omittion of protocol-name nor host-port field.
 */
CTX_url_delegateS(ctx,referer,src,dst,dgrelay)
	void *ctx;
	Referer *referer;
	char *src,*dst;
	char *dgrelay;
{	Urlx *ux;

	if( referer->r_vb.u_proto )
		ux = &referer->r_vb;
	else	ux = &referer->r_my;

	CTX_url_delegate(ctx,src,dst,
		ux->u_proto,
		ux->u_host,
		ux->u_port,
		ux->u_path,
		dgrelay);
}
CTX_url_delegate(ctx,line,xline,myproto,myhost,myport,mypath,dgrelay)
	void *ctx;
	char *line,*xline;
	char *myproto,*myhost;
	int myport;
	char *mypath;
	char *dgrelay;
{	char *sp,*np,*xp;
	URLStr rurl;
	int ulen;
	int ch;
	int uconv;

	sp = line;
	xp = xline;

	for(;;){
		uconv = URICONV_ANY & ~(URICONV_FULL | URICONV_PARTIAL);
			/* should be URICONV_MOUNT ? */
		np = html_nextTagAttr(sp,"",NULL,NULL,NULL,&uconv);
		if( np == NULL )
			break;

		ch = *np;
		*np = 0;

		strcpy(xp,sp); xp += strlen(xp);
		*np = ch;
		sp = np;

		if( ulen = CTX_url_rurl(ctx,np,rurl,myproto,myhost,myport,mypath,dgrelay) )
		if( strncmp(sp+ulen,ODGU_MARK,strlen(ODGU_MARK)) != 0 )
		if( strncmp(sp+ulen,NDGU_MARK,strlen(NDGU_MARK)) != 0 )
		{
			strcpy(xp,rurl);
			sp += ulen;
			xp += strlen(xp);
		}
	}
	strcpy(xp,sp);
}
scan_url(line,func,arg1,arg2)
	char *line;
	int (*func)();
	char *arg1,*arg2;
{	char *sp,*np,*tp,tc;
	int ulen;

	sp = line;
	while( np = html_nextTagAttr(sp,"",NULL,NULL,NULL,NULL) ){
		if( tp = strpbrk(np," \t\r\n\">") ){
			ulen = tp - np;
			tc = *tp;
			*tp = 0;
			(*func)(np,arg1,arg2);
			*tp = tc;
			sp = np + ulen;
		}else	break;
	}
}


/*
 *	delegated-URL SYNTHESIZER
 *	Given "attrs" is a NL-separated list of NAME=VALUEs.  This is a
 *	output format of URL parser in the SLL library.
 */

static char *delegate_url(ctx,url,attrs,ourl,olen,dgrelay)
	void *ctx;
	char *url;
	char *attrs;
	char *ourl;
	char *dgrelay;
{	URLStr abuf;
	char *av[64]; int ac;
	char *up;
	char *proto,*val;
	char *hostport,*delegate;
	char *dproto;
	char *path,xpath[URLSZ];
	char *search;
	char *gselector;
	char  oURLbuf[URLSZ];
	char *modifiers;

	strcpy(abuf,attrs);
	ac = stoV(abuf,64,av,'\n');

	proto = getv(av,"proto");
	dproto = getv(av,"dproto");
	delegate = getv(av,"delegate");
	if( delegate == 0 )
		return 0;
	hostport = getv(av,"hostport");
	if( hostport == NULL ) hostport = getv(av,"host");
	path = getv(av,"path");
	search = getv(av,"search");

if( CTX_mount_url_fromL(ctx,url,proto,hostport,path,search,dproto,delegate) )
return url + strlen(url);

	if( dgrelay == NULL )
		return 0;

	if( proto == 0 )
		return 0;

	if( callback_it(proto) == 0 )
		return 0;

	if( dproto == NULL )
		dproto = "http";

	if( hostport == 0 )
		return 0;

	if( streq(proto,"news") )
		return 0;
	if( streq(proto,"telnet") )
		return 0;

/*
	if( !isRELAYABLE(dgrelay,proto,hostport) )
		return 0;
*/
	if( !isREACHABLE(proto,hostport) )
		return 0;

	if( streq(proto,dproto) )
	if( delegate && hostport && streq(delegate,hostport) )
		return 0; /* no rewriting is necessary */

	if( path && nonxalpha_unescape(path,xpath,1) )
		path = xpath;

	gselector = 0;

	strncpy(oURLbuf,ourl,olen);
	oURLbuf[olen] = 0;

	up = url;
	up = Sprintf(up,"%s://",dproto);

	if( !GOPHER_ON_HTTP && streq(proto,"gopher") ){
		up = Sprintf(url,"gopher://");
		gselector = getv(av,"path");
		if( gselector == 0 || *gselector == 0 )
			gselector = "1";
	}

	up = Sprintf(up,"%s",delegate);
	if( gselector )
		up = Sprintf(up,"/%c",*gselector);
	else	up = Sprintf(up,"/");

	if( strncmp(ourl,url,strlen(url)) == 0 ){
		/* is this right ?  doesn't it suppress necessary one ? */
		/*Verbose("####### DON'T MAKE DUPLICATE REWRITE: %s\n",url);*/
		return 0;
	}

modifiers = CTX_get_modifires(ctx);
/*
if( modifiers[0] && up[-1] == '/' )
	up = Sprintf(up,"%s",modifiers);
else
if( DELEGATE_FLAGS[0] )
if( up[-1] == '/' )
	up = Sprintf(up,"F%s",DELEGATE_FLAGS);
else	up = Sprintf(up,"(F%s)",DELEGATE_FLAGS);
*/

	up = Sprintf(up,"%s",NDGU_MARK);

if( modifiers[0] )
up = Sprintf(up,"/%s/",modifiers);

	up = Sprintf(up,"%s",oURLbuf);
	return up;
}
char *delegate_selector(Conn,xselector,host,iport,gtype)
	Connection *Conn;
	char *xselector,*host,*iport;
{	char dgopher[1024];
	char tmp[1024];
	char *dp;

	dp = Sprintf(dgopher,NDGU_MARK);
	if( DELEGATE_FLAGS[0] )
		dp = printFlags(Conn,dp);

	dp = Sprintf(dp,"gopher://%s:%d/%c",host,iport,gtype?gtype:'1');
	strcpy(tmp,xselector);
	sprintf(xselector,"%s%s",dgopher,tmp);
}

char *file_hostpath(url,proto,login)
	char *url,*proto,*login;
{	char protobuf[128],hostbuf[128],*path;

	if( strchr(url,':') == NULL )
		return NULL;

	if( proto == NULL )
		proto = protobuf;
	proto[0] = 0;

	if( login == NULL )
		login = hostbuf;
	login[0] = 0;

	sscanf(url,"%[a-zA-Z0-9]",proto);
	if( !localPathProto(proto) )
		return NULL;

	path = url + strlen(proto);
	if( path[0] != ':' )
		return NULL;
	path += 1;

	if( strncmp(path,"//",2) == 0 ){
		path += 2;
		if( path[0] == '/' )
			strcpy(login,"localhost");
		else{
			sscanf(path,"%[^/]",login);
			path += strlen(login);
		}
	}
	return path;
}



/*
 *	SCAN A URL AND EXPANDS IT TO A delegated-URL
 */

#include "SLL.h"
extern SLL_putval();
extern SLLRule URL[];

CTX_url_rurl(ctx,url,rurl,dproto,dhost,dport,dpath,dgrelay)
	void *ctx;
	char *url,*rurl,*dproto,*dhost,*dpath;
	char *dgrelay;
{	char *nurl;
	char values[URLSZ];
	char hostport[256];
	char *vp,*proto,*rp;
	char *tail;
	int len;

	if( strncmp(url,"!-_-",4) == 0 ){
		strcpy(rurl,url+4);
		return strlen(url);
	}

	if( reserve_url(ctx) )
		return 0;

	nurl = url;
	vp = values;
	values[0] = 0;

	*rurl = 0;
	if( SLLparse(0,URL,url,&nurl,SLL_putval,vp,URLSZ,&vp) == 0 ){
		len = nurl - url;
		if( dproto && dproto[0] )
			vp = Sprintf(vp,"dproto=%s\n",dproto);

		if( dhost && dhost[0] ){
			if( dproto && dproto[0] )
				HostPort(hostport,dproto,dhost,dport);
			else	sprintf(hostport,"%s:%d",dhost,dport);
			if( *dpath != 0 && *dpath != '/' )
			vp = Sprintf(vp,"delegate=%s/%s\n",hostport,dpath);
			else
			vp = Sprintf(vp,"delegate=%s%s\n",hostport,dpath);
		}
		if((tail = delegate_url(ctx,rurl,values,url,len,dgrelay)) == 0)
			return 0;
		return len;
	}
	return 0;
}

url_partializeS(referer,line,xline)
	Referer *referer;
	char *line,*xline;
{	char *myproto,*myhost;
	int myport;
	char *sp,*np,*xp;
	char *nurl;
	URLStr purl;
	char values[URLSZ],*av[64],*vp,*proto,*host,*port,*path,*search;
	int porti;
	int len;
	int nmod;
	int umask;

	myproto = referer->r_my.u_proto;
	myhost = referer->r_my.u_host;
	myport = referer->r_my.u_port;

	sp = line;
	xp = xline;
	nmod = 0;

	for(;;){
		umask = URICONV_PARTIAL;
		np = html_nextTagAttr(sp,"",NULL,NULL,NULL,&umask);
		if( np == NULL )
			break;
		len = np - sp;
		bcopy(sp,xp,len);
		xp[len] = 0;
		xp += len;
		sp = np;
		vp = values;

		if( umask & URICONV_FULL ){
			/* conflicting, adopt FULL prior to PARTIAL ... */
		}else
		if( SLLparse(0,URL,np,&nurl,SLL_putval,vp,URLSZ,&vp) == 0 ){
			stoV(values,64,av,'\n');
			if( proto = getv(av,"proto") )
			if( host  = getv(av,"host" ) ){
				if( port = getv(av,"port") )
					porti = atoi(port);
				else	porti = serviceport(proto);
				path = getv(av,"path");
				search = getv(av,"search");

				if( porti == myport )
				if( strcaseeq(proto,myproto) )
				if( hostcmp_lexical(host,myhost,1) == 0 ){
					sp += nurl - np;
					*xp++ = '/';
					if( path )
						strcpy(xp,path);
					else	*xp = 0;
					if( search ){
						xp += strlen(xp);
						*xp++ = '?';
						strcpy(xp,search);
					}
					nmod++;
				}
			}
		}
		xp += strlen(xp);
	}
	strcpy(xp,sp);
	return nmod;
}

/*
 *	SCAN A URL-EXTENTION
 */
extern SLLRule URLX[];

putv(t,n,l,vb)
	char *t,*n,*vb;
{	char buf[1024];

	strncpy(buf,n,l);
	buf[l] = 0;
	printf("%s=%s\n",t,buf);
}

scan_url1(url,values)
	char *url;
	char *values;
{	char *nurl;
	char *vp;

	nurl = url;
	vp = values;
	values[0] = 0;
	if( SLLparse(0,URL,url,&nurl, SLL_putval,vp,URLSZ,&vp ) == 0 )
		return nurl - url;
	return 0;
}
scan_urlx(urlx,values)
	char *urlx;
	char *values;
{	char *nurlx;
	char *vp;

	nurlx = urlx;
	vp = values;
	values[0] = 0;
	if( SLLparse(0,URLX,urlx,&nurlx, SLL_putval,vp,URLSZ,&vp ) == 0 )
		return nurlx - urlx;
	return 0;
}


/*
 *	URL SYNTAX TABLES FOR SLL LIBRARY
 */

static char DIGIT[] = "0123456789";
static char ALPHA[] = "\
abcdefghijklmnopqrstuvwxyz\
ABCDEFGHIJKLMNOPQRSTUVWXYZ\
";

static char ALPHADIGIT[] = "\
abcdefghijklmnopqrstuvwxyz\
ABCDEFGHIJKLMNOPQRSTUVWXYZ\
0123456789\
";
extern char SLL_OTHERWISE[];
#define OTHERWISE SLL_OTHERWISE

/*
static char NALPHA[] = "\
abcdefghijklmnopqrstuvwxyz\
ABCDEFGHIJKLMNOPQRSTUVWXYZ\
0123456789\
$-_.&+\
!*'();, \
";
*/
static char NALPHA[] = "\
abcdefghijklmnopqrstuvwxyz\
ABCDEFGHIJKLMNOPQRSTUVWXYZ\
0123456789\
$-_.&+\
!*'();,\
";

static char XALPHA[] = "\
abcdefghijklmnopqrstuvwxyz\
ABCDEFGHIJKLMNOPQRSTUVWXYZ\
0123456789\
$-_.&+\
!*'():;, %\
";

static char YALPHA[] = "\
abcdefghijklmnopqrstuvwxyz\
ABCDEFGHIJKLMNOPQRSTUVWXYZ\
0123456789\
$-_@.&+\
!~*'():;, %\
";

/* "|" is not in "uric" in RFC2396 but usually used in CGI-Counter for ex. */
static char URIC[] = "\
abcdefghijklmnopqrstuvwxyz\
ABCDEFGHIJKLMNOPQRSTUVWXYZ\
0123456789\
;/?:@&=+$,\
-_.!~*'()\
%\
|\
";

ISRULE( URL	);
ISRULE( HTTP	);
ISRULE( GOPHER	);
ISRULE( FTP	);
ISRULE( FILEP	);
ISRULE( NEWS	);
ISRULE( NNTP	);
ISRULE( WAIS	);

/*
ISRULE( AFS	);
ISRULE( MAILTO	);
ISRULE( TELNET	);
ISRULE( GENERIC);
*/

ISRULE( HOSTPORT);
ISRULE( PATH);
ISRULE( SEARCH);

ALT(URL)
	{ "proto",	"https",	HTTP,		IGNCASE|PUTGATE}, /* must be before http */
	{ "proto",	"http",		HTTP,		IGNCASE|PUTGATE},
	{ "proto",	"gopher",	GOPHER,		IGNCASE|PUTGATE},
	{ "proto",	"ftp",		FTP,		IGNCASE|PUTGATE},
	{ "proto",	"file",		FILEP,		IGNCASE|PUTGATE},
	{ "proto",	"news",		NEWS,		IGNCASE|PUTGATE},
	{ "proto",	"nntp",		NNTP,		IGNCASE|PUTGATE},
	{ "proto",	"wais",		WAIS,		IGNCASE|PUTGATE},
/*
	{ "proto",	"afs://",	AFS,		IGNCASE|PUTGATE},
	{ "proto",	"mailto::",	MAILTO,		IGNCASE|PUTGATE},
	{ "proto",	"telnet:",	TELNET,		IGNCASE|PUTGATE},
	{ "proto",	IMM,		GENERIC,	IGNCASE|PUTGATE},
*/
END

SEQ(HTTP)
	{ "://",	"://",		NEXT		},
	{ "hostport",	IMM,		HOSTPORT,	PUTVAL},
	{ "path",	"/",		PATH,		OPTIONAL|PUTVAL},
	{ "search",	"?",		SEARCH,		OPTIONAL|PUTVAL},
END

ISRULE( IALPHA );
ISRULE( DIGITS );
ISRULE( ALPHAS );
ISRULE( NALPHAS);
ISRULE( XALPHAS);
ISRULE( YALPHAS);
ISRULE( DOMLABEL);

SEQ(HOSTNAME)
	{ "name",	IMM,		DOMLABEL	},
	{ "name",	".",		HOSTNAME,	OPTIONAL},
END
SEQ(HOSTNUMBER)
	{ "num1",	IMM,		DIGITS		},
	{ "num2",	".",		DIGITS		},
	{ "num3",	".",		DIGITS		},
	{ "num4",	".",		DIGITS		},
END
/* try HOSTNUMBER first, not to let 123.123.123.123 be matched with HOSTNAME */
ALT(HOST)
	{ "number",	IMM,		HOSTNUMBER	},
	{ "name",	IMM,		HOSTNAME	},
END
SEQ(PORT)
	{ "number",	IMM,		DIGITS		},
END
SEQ(HOSTPORT)
	{ "host",	IMM,		HOST,		PUTVAL},
	{ "port",	":",		PORT,		OPTIONAL|PUTVAL},
END

ALT(DOMLABEL2)
	{ "alphadigit",	ALPHADIGIT,	DOMLABEL2,	CHARSET},
	{ "hyphen",	"-",		DOMLABEL	},
	{ "terminate",	OTHERWISE,	SUCCESS		},
END
SEQ(DOMLABEL)
	{ "alphadigit",	ALPHADIGIT,	DOMLABEL2,	CHARSET},
END

SEQ(IALPHA)
	{ "alpha",	ALPHA,		NEXT,		CHARSET	},
	{ "xalphas",	IMM,		NALPHAS,	OPTIONAL},
END

/*
SEQ(SEARCH1)
	{ "search",	IMM,		XALPHAS,	},
	{ "search",	"+",		SEARCH,		OPTIONAL},
END
SEQ(SEARCH)
	{ "search",	IMM,		SEARCH1,	OPTIONAL},
END
*/
SEQ(URICS)
	{ "uric",	URIC,		NEXT,		CHARSET},
	{ "uric",	IMM,		URICS,		OPTIONAL},
END
SEQ(SEARCH)
	{ "search",	IMM,		URICS,		OPTIONAL},
END

SEQ(ALPHAS)
	{ "alpha",	ALPHA,		NEXT,		CHARSET},
	{ "alpha",	IMM,		ALPHAS,		OPTIONAL},
END
SEQ(NALPHAS)
	{ "nalpha",	NALPHA,		NEXT,		CHARSET},
	{ "nalpha",	IMM,		NALPHAS,	OPTIONAL},
END
SEQ(XALPHAS)
	{ "xalpha",	XALPHA,		NEXT,		CHARSET},
	{ "xalpha",	IMM,		XALPHAS,	OPTIONAL},
END
SEQ(YALPHAS)
	{ "yalpha",	YALPHA,		NEXT,		CHARSET},
	{ "yalpha",	IMM,		YALPHAS,	OPTIONAL},
END

SEQ(PATH1)
	{ "name",	IMM,		YALPHAS,	OPTIONAL},
	{ "dir",	"/",		PATH,		OPTIONAL},
END

ALT(PATH)
	{ "path",	IMM,		PATH1		},
	{ "nullpath",	IMM,		SUCCESS		},
END


SEQ(USERPASS)
	{ "user",	IMM,		XALPHAS,	PUTVAL},
	{ "pass",	":",		XALPHAS,	OPTIONAL|PUTVAL},
	{ "@",		"@",		SUCCESS		},
END
SEQ(LOGIN)
	{ "userpass",	IMM,		USERPASS,	OPTIONAL|PUTVAL},
	{ "hostport",	IMM,		HOSTPORT,	PUTVAL},
END
SEQ(FTP)
	{ "login",	"://",		LOGIN,		PUTVAL	},
	{ "path",	"/",		PATH,		OPTIONAL|PUTVAL},
END

SEQ(FILEH)
	{ "host",	IMM,		HOST,		OPTIONAL|PUTVAL	},
	{ "path",	"/",		PATH,		OPTIONAL|PUTVAL	},
END
ALT(FILEP)
	{ "file",	"://",		FILEH		},
	{ "path",	":",		PATH,		PUTVAL	},
END

ALT(GROUP1)
	{"name",	".",		GROUP1		},
	{"name",	IMM,		SUCCESS		},
END
SEQ(GROUP)
	{"name",	IMM,		IALPHA		},
	{"name",	IMM,		GROUP1,		OPTIONAL},
END
SEQ(ARTICLE)
	{"serial",	IMM,		XALPHAS		},
	{"domain",	"@",		HOST		},
END
ALT(GROUPART)
	{"group",	IMM,		GROUP,		PUTVAL	},
	{"article",	IMM,		ARTICLE,	PUTVAL	},
END
SEQ(NEWS)
	{"groupart",	":",		GROUPART,	PUTVAL	},
END
SEQ(NNTP)
	{"hostport",	"://",		HOSTPORT,	PUTVAL	},
	{"group",	"/",		GROUP,		PUTVAL	},
	{"search",	"?",		SEARCH,		OPTIONAL|PUTVAL},
END

SEQ(DATABASE)
	{"database",	IMM,		XALPHAS,	},
END
SEQ(WAIS)
	{"hostport",	"://",		HOSTPORT,	PUTVAL	},
	{"database",	"/",		DATABASE,	PUTVAL	},
	{"search",	"?",		SEARCH,		OPTIONAL|PUTVAL},
END


ALT(SELECTOR)
	{ "selector",	IMM,		PATH,		},
END

ALT(GTYPE)
	{ "gtype",	DIGIT,		SUCCESS,	CHARSET},
	{ "nullgtype",	IMM,		SUCCESS		},
END

SEQ(GSELECTOR)
	{ "gtype",	IMM,		GTYPE,		PUTVAL},
	{ "selector",	IMM,		SELECTOR,	OPTIONAL|PUTVAL},
END

SEQ(GOPHER)
	{ "//",		"://",		NEXT		},
	{ "hostport",	IMM,		HOSTPORT,	PUTVAL},
	{ "path",	"/",		GSELECTOR,	OPTIONAL|PUTVAL},
	{ "search",	"?",		SEARCH,		OPTIONAL|PUTVAL},
END


ALT(DIGITS1)
	{ "digit",	DIGIT,		DIGITS1,	CHARSET	},
	{ "nondigit",	IMM,		SUCCESS		},
END
ALT(DIGITS)
	{ "digit",	DIGIT,		DIGITS1,	CHARSET	},
END

/*
 *
 */
SEQ(FLAGS1)
	{ "flags",	"=",		ALPHAS,		},
	{ "eoflags",	"=",		SUCCESS		},
END
SEQ(FLAGS2)
	{ "flags",	"+",		ALPHAS,		},
	{ "eoflags",	"=",		SUCCESS		},
END
SEQ(FLAGS3)
	{ "flags",	"(",		ALPHAS,		},
	{ "eoflags",	")",		SUCCESS		},
END
SEQ(FLAGS4)
	{ "flags",	"@",		ALPHAS,		},
	{ "eoflags",	"@",		SUCCESS		},
END
ALT(FLAGS)
	{ "f1",		IMM,		FLAGS1		},
	{ "f2",		IMM,		FLAGS2		},
	{ "f3",		IMM,		FLAGS3		},
	{ "f4",		IMM,		FLAGS4		},
END
SEQ(URLX)
	{ "xflags",	IMM,		FLAGS,		PUTVAL|OPTIONAL},
	{ "xproto",	IMM,		ALPHAS,		PUTVAL|OPTIONAL},
	{ "xhostport",	":",		HOSTPORT,	PUTVAL},
	{ "xgtype",	"=",		DIGITS,		PUTVAL|OPTIONAL},
END

/*
ISRULE(ROUTE);
ISRULE(HOSTLIST);

SEQ(ROUTE)
	{ "proto",	IMM,		ALPHAS,		PUTVAL},
	{ "host",	"://"		HOST,		PURVAL},
	{ "port",	":"		PORT,		PURVAL},
	{ "dstlist",	":"		HOSTLIST,	PURVAL},
	{ "dstlist",	":"		HOSTLIST,	PURVAL|OPTIONAL},
END

SEQ(HOSTLIST)
	{ "host",	IMM,		HOST,
END
*/
