;;; 82e-conv.el --- convert from UTF-8 to GNU Emacs
;;; :vi:se ai lisp:

;; Copyright (C) 1999 IIDA Yosiaki

;; Author: IIDA,Yosiaki <iida@secom-sis.co.jp>
;; Maintainer: IIDA,Yosiaki <iida@secom-sis.co.jp>
;; Created: 1999-06-11
;; Version: -0.9
;; Keyword: Conversion

;; This file is NOT part of GNU Emacs.

;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.

;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with this program; see the file COPYING.  If not, write to the
;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.

;;; Commentary:

;;	Table Type:      Unicode to NEmacs Mule table
;;	Table format:    Emacs Lisp alist
;;
;;	General notes:
;;
;;	The table contains the data on how Unicode characters
;;	map into NEmacs Mule internal codes.
;;
;;	Format:  Alist
;;	         Car is UTF-8 (in printed downcase hexadecimal)
;;	         Cdr is NEmacs Mule internal code

;;; Change log:

;;; Code:

(or (and (boundp '82m-tab) 82m-tab t)	; T to make its value comact.
    ;; Hmm, no tables...
    (load "82m-tab"))

(defun 82m-convert-a-hexprintedutf8-to-string (hex-printed-utf8)
  "Convert from HEX-PRINTED-UTF8 to NEmacs Mule internal code and return.
HEX-PRINTED-UTF8 must be a hexadecimal-digit string for a
single UTF-8 character."
  (cdr (assoc hex-printed-utf8 82m-tab)))

(defun 82e-convert-hexprintedutf8s-to-string (hex-printed-utf8s)
  "Convert from HEX-PRINTED-UTF8S to GNU Emacs internal code and return.
HEX-PRINTED-UTF8S must be a hexadecimal-digit string for
multiple UTF-8 characters."
  (setq hex-printed-utf8s (downcase hex-printed-utf8s))
  (let (string-list (rest-len (length hex-printed-utf8s)))
    (while (< 0 rest-len)
      (let* (head
	     (top (string-to-char
		   (substring hex-printed-utf8s 0 1)))
	     (utf8-len (cond ((= top ?e) 6)
			((= top ?c) 4)
			((= top ?d) 4)
			((< top ?8) 2))))
	(if utf8-len
	    (setq head (let ((subs (substring hex-printed-utf8s 0 utf8-len)))
			 (if (= utf8-len 2)
			     (char-to-string (string-to-number subs 16))
			   (or (82m-convert-a-hexprintedutf8-to-string subs)
			       subs)))
		  hex-printed-utf8s (substring hex-printed-utf8s utf8-len))
	  (setq head (substring hex-printed-utf8s 0 2)
		hex-printed-utf8s (substring hex-printed-utf8s 2)))
	(setq string-list (append string-list (list head))))
      (setq rest-len (length hex-printed-utf8s)))
    (string-as-multibyte (apply (function concat) string-list))))

(defun 82e-convert-universal-to-string (universal)
  "Convert from UNIVERSAL to GNU Emacs internal code and return.
UNIVERSAL must be a string containing hexadecimal-digit substrings for
UTF-8 characters."
  (interactive "sHexadecimal UTF-8 to convert: ")
  (let ((string (82e-convert-hexprintedutf8s-to-string
		 (mapconcat
		  (function
		   (lambda (x)
		     (setq x (char-to-string (downcase x)))
		     (if (string-match "[0-9a-f]" x)
			 x
		       "")))
		  universal ""))))
    (if (interactive-p)
	(message "%s" string))
    string))

(defun 82e-convert-hexprintedutf8s-region (beg end)
  "Convert region, from BEG to END, to GNU Emacs internal code and return.
Region must be a hexadecimal-digit string for multiple UTF-8
characters."
  (interactive "*r")
  (save-excursion
    (save-restriction
      (narrow-to-region beg end)
      (let ((string (82e-convert-hexprintedutf8s-to-string (buffer-string))))
	(delete-region beg end)
	(insert string)))))

(defun 82e-convert-universal-region (beg end)
  "Convert region, from BEG to END, to GNU Emacs internal code and return.
Region must contain hexadecimal-digit substrings for UTF-8 character."
  (interactive "*r")
  (save-restriction
    (narrow-to-region beg end)
    (goto-char (point-min))
    (while (not (eobp))
      (if (looking-at "[0-9a-f]")
	  (forward-char)
	(delete-char 1)))
    (82e-convert-hexprintedutf8s-region (point-min) (point-max))))

(defun 82e-convert-regexp-region (beg end regexp)
  "In region from BEG to END, look for UTF-8 part specified by REGEXP and convert."
  (save-excursion
    (save-restriction
      (narrow-to-region beg end)
      (goto-char (point-min))
      (while (re-search-forward regexp () t)
	(82e-convert-universal-region (match-beginning 1)
				      (match-end 1))))))

(defvar 82e-convert-pp-regexp "\\[UNIVERSAL 12\\] '\\([0-9a-f][0-9a-f]*\\)'H" 
  "Regexp to find a UTF-8 part in PP.EXE output.")

(defun 82e-convert-pp-region (beg end)
  "Look for UTF-8 part from PP.EXE output, convert for region from BEG to END.
The value of ``82e-convert-pp-regexp'' specifies the UTF-8 part in
region."
  (interactive "r")
  (82e-convert-regexp-region beg end 82e-convert-pp-regexp))

(defun 82e-convert-pp-buffer ()
  "Look for UTF-8 part from PP.EXE output, convert for whole buffer.
The value of ``82e-convert-pp-regexp'' specifies the UTF-8 part in
buffer."
  (interactive)
  (82e-convert-pp-region (point-min) (point-max)))


(defvar 82e-convert-trans-regexp
  "[CNO]* = 0C[0-9a-f][0-9a-f] \
\\([0-9a-f][0-9a-f][0-9a-f][0-9a-f]\
\\( [0-9a-f][0-9a-f][0-9a-f][0-9a-f]\\)*\
\\( [0-9a-f][0-9a-f]\\)?\\)"
  "Regexp to find a UTF-8 part in TRANS.EXE output.")

(defvar 82e-delete-trans-tag-length-regexp
  "[CNO]* = \\(0C[0-9a-f][0-9a-f] \\)"
  "Regexp to find the tag and length of universal string in TRANS.EXE output.")

(defun 82e-delete-trans-tag-length-region (beg end)
  "Delete the tag and length of universal string in TRANS.EXE output.
The value of ``82e-delete-trans-tag-length-regexp'' specifies the tag
and length part in region."
  (save-excursion
    (save-restriction
      (narrow-to-region beg end)
      (goto-char (point-min))
      (while (re-search-forward 82e-delete-trans-tag-length-regexp () t)
	(delete-region (match-beginning 1) (match-end 1))))))

(defun 82e-convert-trans-region (beg end)
  "\
Look for UTF-8 part from TRANS.EXE output, convert for region from BEG to END.
The value of ``82e-convert-trans-regexp'' specifies the UTF-8 part in
region."
  (interactive "r")
  (save-excursion
    (save-restriction
      (narrow-to-region beg end)
      (82e-convert-regexp-region beg end 82e-convert-trans-regexp)
      (82e-delete-trans-tag-length-region (point-min) (point-max)))))

(defun 82e-convert-trans-buffer ()
  "Look for UTF-8 part from TRANS.EXE output, convert for whole buffer.
The value of ``82e-convert-trans-regexp'' specifies the UTF-8 part in
buffer."
  (interactive)
  (82e-convert-trans-region (point-min) (point-max)))

(provide '82e-conv)

;;; 82e-conv.el ends here.
