;;; -*- mode: lisp; coding: utf-8 -*-
;;; 
;;; Copyright (c) 2008 Masayuki Onjo <onjo@lispuser.net>
;;; 
;;; Redistribution and use in source and binary forms, with or without
;;; modification, are permitted provided that the following conditions
;;; are met:
;;; 
;;;   * Redistributions of source code must retain the above copyright
;;;     notice, this list of conditions and the following disclaimer.
;;; 
;;;   * Redistributions in binary form must reproduce the above
;;;     copyright notice, this list of conditions and the following
;;;     disclaimer in the documentation and/or other materials
;;;     provided with the distribution.
;;; 
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

;;;
;;; Gauche's guessing character encoding
;;;
;;; Copyright (c) 2000-2007 Shiro Kawai  <shiro@acm.org>
;;;

(in-package :jp)

(defun guess (vector &optional (scheme :JP))
  (case scheme
    ((:*JP :JP) (guess-jp vector))
    (t          (error "scheme parameter: supported :*JP only"))))

(defun guess-jp (buffer &aux (len (length buffer)))
  (declare (optimize (speed 3) (space 0) (safety 0) (debug 0)))
  (macrolet ((dfa-init (dfa-st dfa-ar)
	       `(vector ,dfa-st ,dfa-ar 0 1.0d0))
	     (score (dfa)  `(svref ,dfa 3))
	     (state (dfa)  `(svref ,dfa 2))
	     (arcs (dfa)   `(svref ,dfa 1))
	     (states (dfa) `(svref ,dfa 0))
	     (dfa-alive (dfa) `(>= (the fixnum (state ,dfa)) (the fixnum 0)))
	     (dfa-next (dfa ch)
	       `(when (dfa-alive ,dfa)
		  (when (>= (the fixnum (state ,dfa)) (the fixnum 0))
		    (let ((temp (svref
				 (svref (states ,dfa) (state ,dfa))
				 ,ch)))
		      (if (< (the fixnum temp) (the fixnum  0))
			  (setf (state ,dfa) -1)
			  (setf (state ,dfa) (the fixnum (car (svref (arcs ,dfa) temp)))
				(score ,dfa) (* (the double-float (score ,dfa))
						(the double-float (cdr (svref (arcs ,dfa) temp))))))))))
	     ;; utility
	     (process-dfa (dfa ch value &rest others)
	       `(when (dfa-alive ,dfa)
		  (when (and ,@(mapcar (lambda (dfa) `(not (dfa-alive ,dfa))) others))
		    (return-from guess-body ,value))
		  (dfa-next ,dfa ,ch)))
	     ;; result
	     (iso-2022-jp () :jis)
	     (euc-jp ()      :euc-jp)
	     (shiftjis ()    :sjis)
	     (utf-8 ()       :utf8))
    (block guess-body
       (let* ((eucj (dfa-init +eucj-st+ +eucj-ar+))
	      (sjis (dfa-init +sjis-st+ +sjis-ar+))
	      (utf8 (dfa-init +utf8-st+ +utf8-ar+))
	      (top  nil))
	 (declare (dynamic-extent eucj sjis utf8 top))
	 (loop for c of-type fixnum across buffer
	       for i of-type fixnum from 0 do
	      (when (and (= (the fixnum c) (the fixnum #x1b)) (< i len))
		 (let ((c (aref buffer (the fixnum (1+ i)))))
		   (when (or (= (the fixnum c) (the fixnum #x24))  ; $
			     (= (the fixnum c) (the fixnum #x28))) ; (
		     (return-from guess-body (iso-2022-jp)))))
	       (process-dfa eucj c (euc-jp)    sjis utf8)
	       (process-dfa sjis c (shiftjis)  eucj utf8)
	       (process-dfa utf8 c (utf-8)     sjis eucj)
               (when (and (not (dfa-alive eucj)) (not (dfa-alive sjis)) (not (dfa-alive utf8)))
		 (return nil)))
	 ;; pick highest score
	 (when (dfa-alive eucj)
	   (setf top eucj))
	 (when (dfa-alive utf8)
	   (if top
	       (when (<= (the double-float (score top)) (the double-float (score utf8)))
		 (setf top utf8))
	       (setf top utf8)))
	 (when (dfa-alive sjis)
	   (if top
	       (when (< (the double-float (score top)) (the double-float (score sjis)))
		 (setf top sjis))
	       (setf top sjis)))
	 (cond ((eq top eucj) (euc-jp))
	       ((eq top utf8) (utf-8))
	       ((eq top sjis) (shiftjis))
	       (t             nil))))))
