#! /usr/bin/perl
#
# Samba Multibyte convert tool
# copyright (c) 2000, Hiroshi MIURA 
# All rights reserved by Hiroshi MIURA
# 
# Changelog
#   * Aug. 7, 2000 Ver.1.7 Hiroshi MIURA
#     - change command file name 'hextoeuc' to 'smbfnconv'
#       to figure out its functionality.
#     - added script to install
#
#   * Jul. 31, 2000 Ver.1.6 Hiroshi MIURA
#     - add -a option
#     - tuning find oparation.
# 
#   * Jul. 31, 2000 Ver.1.5 Hiroshi MIURA
#     - fix overflow problem in rmdir.
#
#   * Jul. 30, 2000 Ver.1.4 Hiroshi MIURA
#     - change command line not to using find piping.
#       thats more smart to use.
#     - convert to GETA if invalid charcode exists.
#
#   * Jul. 30, 2000 Ver.1.3 Hiroshi MIURA
#     - add capability of the hextohex, sjistosjis,
#        captocap, jistojis, euctoeuc.
#       These are to fix the samba compativility problem.
#
#   * Jul. 23, 2000 Ver.1.2 Hiroshi MIURA
#     - stop rmdir action when -d option is added. 
#
#   * Jul. 21, 2000 Ver.1.1, Hiroshi MIURA
#     - add about directory handling
#     - be usable the -p option.
#
#   * Jul. 20, 2000 Ver 1.0, Hiroshi MIURA
#     - Add logic about SJIS regularion and the other 
#       Charset handling
#     - select function using which command name is used.
#     - regulation logic is implied by binary search method.
#
#   * Jun. ??, 2000 Ver 0.9, Hiroshi MIURA
#     - test release
#     - only convert HEX to EUC.
#       primitive implimentation 
#
# TODO:
#   - inplement UTF-8 and EUC with JIS X0212(so-called 3byte EUC)
#   - don't use 'find' command and DO use 'File::Find'
#
# usage: commandname [-l][-h][-p  </where/to/some/hardlink/path/>][-d] 
#            </where/to/conv> [ </the/other/to/conv> [ <another/to/conv> .... ]]
#
#   -p:  default path => same path with original.
#
# Available command name is....
#  euctohex, euctocap, euctojis, euctosjis
#  jistohex, jistocap, jistoeuc, jistosjis
#  captohex, captoeuc, captojis, captosjis
#  hextoeuc, hextocap, hextojis, hextosjis
#  sjistohex, sjistocap, sjistojis, sjistoeuc
#  sjistosjis, captocap, hextohex, euctoeuc, jistojis
#
# these're 25 patterns. :-`)
#
require 5.0003;
use Getopt::Std;
use File::stat;
# use File::Find;
use Errno;

# now, I use jcode.pl, does someone knows how to use Jcode.pm??
#
require "jcode.pl";

getopts("dhlp:a:");
&Show_Usage($comn) if ($opt_h);

# -----------------------------------------
# definition of important table and constants, regex.
#
@sjisconv= (
0xfa40, 0xfa49, 0xeeef,
0xfa4a, 0xfa53, 0x8754,
0xfa54, 0xfa54, 0x81ca,
0xfa55, 0xfa57, 0xeefa,
0xfa58, 0xfa58, 0x878a,
0xfa59, 0xfa59, 0x8782,
0xfa5a, 0xfa5a, 0x8784,
0xfa5b, 0xfa5b, 0x81e6,
0xfa5c, 0xfa7e, 0xed40,
0xfa80, 0xfa9b, 0xed63,
0xfa9c, 0xfafc, 0xed80,
0xfb40, 0xfb5b, 0xede1,
0xfb5c, 0xfb7e, 0xee40,
0xfb80, 0xfb9b, 0xee63,
0xfb9c, 0xfbfc, 0xee80,
0xfc40, 0xfc4b, 0xeee1,
);
$sjisconvlen=($#sjisconv+1)/3-1;

@sjisreg = (
0x8470, 0x847e, 0x8440,
0x8754, 0x8754, 0xfa4a,
0x8782, 0x8782, 0xfa59,
0x8784, 0x8784, 0xfa5a,
0x878a, 0x878a, 0xfa58,
0x8790, 0x8790, 0x81e0,
0x8791, 0x8791, 0x81df,
0x8792, 0x8792, 0x81e7,
0x8795, 0x8795, 0x81e3,
0x8796, 0x8796, 0x81db,
0x8797, 0x8797, 0x81da,
0x879a, 0x879a, 0x81e6,
0x879b, 0x879b, 0x81bf,
0x879c, 0x879c, 0x81be,
0xed40, 0xed62, 0xfa5c,
0xed63, 0xed7e, 0xfa80,
0xed80, 0xede0, 0xfa9c,
0xede1, 0xedfc, 0xfb40,
0xee40, 0xee62, 0xfb5c,
0xee63, 0xee7e, 0xfb80,
0xee80, 0xeee0, 0xfb9c,
0xeee1, 0xeeec, 0xfc40,
0xeeef, 0xeef8, 0xfa40, 
0xeef9, 0xeef9, 0x81ca,
0xfa54, 0xfa54, 0x81ca,
0xfa6b, 0xfa6b, 0x81e6
);
$sjisreglen=($#sjisreg+1)/3-1;
 
%match = (
 SJIS_C    => '[\x80-\x9f\xe0-\xfc][\x40-\xfc]',
 SJIS_KANA => '[\xa1-\xdf]',
 SJIS_O    => '[\x81-\xfc]',
 SJIS_S    => '[\x81-\x9f\xe0-\ec][\x40-\xfc]',
 SKOS_D    => '[\xed-\xfc][\x40-\xfc]'
);

$GETA=sprintf "%c%c",0x81,0xac;

#---------------------------------------------
# check command name which it called.
# 
if ($opt_a) {
    $comn=$opt_a;
} else {
    $comn=$0;
    $comn=~s|(.*/)*(.+)|$2|;
}

## who am I? :)
## definition of mb convert 

if     ($comn eq "euctohex"){
    $convmb= sub {
	local(*nname) = @_;
	my $name=$nname;
        jcode::euc2sjis(*nname);
        &sjis2rsjis(*nname);
        $nname =~ s/($match{SJIS_C})/sprintf ":%2x:%2x",ord($1),ord(substr($1,1))/geo;
        $nname =~ s/($match{SJIS_KANA})/sprintf ":%2x",ord($1)/geo; 
        ($name ne $nname);
    };

 }elsif ($comn eq "euctocap"){
    $convmb= sub {
	local(*nname) = @_;
        my $name=$nname;
        jcode::euc2sjis(*nname);
	&sjis2rsjis(*nname);
	$nname=~s/($match{SJIS_O})/sprintf ":%2x",ord($1)/geo;
	($name ne $nname);
      };

 }elsif ($comn eq "euctojis"){
    $convmb= sub {
	local(*nname) = @_;
        my $name=$nname;
	jcode::euc2jis(*nname);
        ($name ne $nname);
    };

 }elsif ($comn eq "euctosjis"){
    $convmb= sub {
	local(*nname) = @_;
        my $name=$nname;
	jcode::euc2jis(*nname);
        ($name ne $nname);
    };

 }elsif ($comn eq "hextoeuc"){
    $convmb= sub {
	local(*nname) = @_;
        my $name=$nname;
	$nname=~ s/\:([0-9a-f][0-9a-f])/sprintf "%c",hex($1)/geo;
	&sjis2csjis(*nname);
	return 0 if ($name eq $nname);
        jcode::sjis2euc(*nname);
    };

 }elsif ($comn eq "hextojis"){
    $convmb= sub {
	local(*nname) = @_;
        my $name=$nname;
	$nname=~ s/\:([0-9a-f][0-9a-f])/sprintf "%c",hex($1)/geo;
	&sjis2csjis(*nname);
	return 0 if ($name eq $nname);
	jcode::sjis2jis(*nname);
    };

 }elsif (($comn eq "hextocap")||($comn eq "captocap")){
    $convmb= sub {
	local(*nname) = @_;
        my $name=$nname;
	$nname=~ s/\:([0-9a-f][0-9a-f])/sprintf "%c",hex($1)/geo;
	return 0 if ($name eq $nname);
	&sjis2rsjis(*nname);
	$nname=~s/($match{SJIS_O})/sprintf ":%2x",ord($1)/geo;
	($name ne $nname);
    };

 }elsif ($comn eq "hextosjis"){
    $convmb= sub {
	local(*nname) = @_;
        my $name=$nname;
	$nname=~ s/\:([0-9a-f][0-9a-f])/sprintf "%c",hex($1)/geo;
	&sjis2rsjis(*nname);
	($name ne $nname);
    };

 }elsif ($comn eq "captoeuc"){
    $convmb= sub {
	local(*nname) = @_;
        my $name=$nname;
	$nname=~ s/\:([0-9a-f][0-9a-f])/sprintf "%c",hex($1)/geo;
	&sjis2csjis(*nname);
	return 0 if ($name eq $nname);
	jcode::sjis2euc(*nname);
    };

 }elsif ($comn eq "captojis"){
    $convmb= sub {
	local(*nname) = @_;
        my $name=$nname;
	$nname=~ s/\:([0-9a-f][0-9a-f])/sprintf "%c",hex($1)/geo;
	&sjis2csjis(*nname);
	return 0 if ($name eq $nname);
	jcode::sjis2jis(*nname);
    };

 }elsif ($comn eq "captosjis"){
    $convmb= sub {
	local(*nname) = @_;
        my $name=$nname;
	$nname=~ s/\:([0-9a-f][0-9a-f])/sprintf "%c",hex($1)/geo;
	return 0 if ($name eq $nname);
	&sjis2rsjis(*nname);
    };

 }elsif (($comn eq "captohex")||($comn eq "hextohex")){
    $convmb= sub {
	local(*nname) = @_;
        my $name=$nname;
	$nname=~ s/\:([0-9a-f][0-9a-f])/sprintf "%c",hex($1)/geo;
	&sjis2rsjis(*nname);
	return 0 if ($name eq $nname);
        $nname =~ s/($match{SJIS_C})/sprintf ":%2x:%2x",ord($1),ord(substr($1,1))/geo;
        $nname =~ s/($match{SJIS_KANA})/sprintf ":%2x",ord($1)/geo; 
    };

 }elsif ($comn eq "jistocap"){
    $convmb= sub {
	local(*nname) = @_;
        my $name=$nname;
	jcode::jis2jis(*nname);
	&sjis2rsjis(*nname);
	$nname=~s/($match{SJIS_O})/sprintf ":%2x",ord($1)/geo;
	($name ne $nname);
      };

 }elsif ($comn eq "jistohex"){
    $convmb= sub {
	local(*nname) = @_;
        my $name=$nname;
	jcode::jis2sjis(*nname);
	&sjis2rsjis(*nname);
        $nname =~ s/($match{SJIS_C})/sprintf ":%2x:%2x",ord($1),ord(substr($1,1))/geo;
        $nname =~ s/($match{SJIS_KANA})/sprintf ":%2x",ord($1)/geo; 
       ($name ne $nname);
      };

 }elsif ($comn eq "jistoeuc"){
    $convmb= sub {
	local(*nname) = @_;
        my $name=$nname;
	jcode::jis2euc(*nname);
        ($name ne $nname);
    };

 }elsif ($comn eq "jistosjis"){
    $convmb= sub {
	local(*nname) = @_;
        my $name=$nname;
	jcode::jis2sjis(*nname);
	&sjis2rsjis(*nname);
        ($name ne $nname);
    };

 }elsif ($comn eq "sjistocap"){
    $convmb= sub {
	local(*nname) = @_;
        my $name=$nname;
	&sjis2rsjis(*nname);
	$nname=~s/($match{SJIS_O})/sprintf ":%2x",ord($1)/geo;
	($name ne $nname);
    };

 }elsif ($comn eq "sjistohex"){
    $convmb= sub {
	local(*nname) = @_;
        my $name=$nname;
        &sjis2rsjis(*nname);
        $nname =~ s/($match{SJIS_C})/sprintf ":%2x:%2x",ord($1),ord(substr($1,1))/geo;
        $nname =~ s/($match{SJIS_KANA})/sprintf ":%2x",ord($1)/geo;
        ($name ne $nname);
    };

 }elsif ($comn eq "sjistoeuc"){
    $convmb= sub {
	local(*nname) = @_;
        my $name=$nname;
        &sjis2csjis(*nname);
	jcode::sjis2euc(*nname);
        ($name ne $nname);
    };

 }elsif ($comn eq "sjistojis"){
    $convmb= sub {
	local(*nname) = @_;
        my $name=$nname;
        &sjis2csjis(*nname);
	jcode::sjis2jis(*nname);
        ($name ne $nname);
    };

 }elsif ($comn eq "sjistosjis"){
    $convmb= sub {
	local(*nname) = @_;
        my $name=$nname;
	&sjis2rsjis(*nname);
	($name ne $nname);
    };

 }elsif ($comn eq "euctoeuc"){
    $convmb= sub {
	local(*nname) = @_;
        my $name=$nname;
	jcode::euc2sjis(*nname);
	&sjis2csjis(*nname);
        jcode::sjis2euc(*nname);
        ($name ne $nname);
    };

 }elsif ($comn eq "jistojis"){
    $convmb= sub {
	local(*nname) = @_;
        my $name=$nname;
	jcode::jis2sjis(*nname);
	&sjis2csjis(*nname);
        jcode::sjis2jis(*nname);
        ($name ne $nname);
    };

 }else {
    &Show_Usage();
    exit 1;
}

#========================================================
# main loop routine.
#

@dirlist=();

open(FILEN, "find @ARGV -print |") or die "Cannot open directories\n"; 

while (<FILEN>){
    chomp();
    $nname=$name=$_;
    next  unless (&$convmb(*nname) || $opt_p);
    $nname="$opt_p/$nname" if ($opt_p);
 
    if ( -d $name) {                
	$mode = stat($name)->mode;    # we must make same mode dir,isn't it?
	$mode &= 0777;
	if ($opt_d) {
	    printf STDERR "mkdir %s, %o\n",$nname, $mode;
	    push @dirlist, $name;
	} else {
	    if(mkdir $nname, $mode){
		push @dirlist, $name;
	    } else {
		print STDERR $!;
	    }
	}
    } else {
	if ($opt_d) {
	    print STDERR "convert $name to  $nname\n";
	} else {
	    $res=link $name, $nname;
	    if ($res) {
		unlink $name unless ($opt_l);
	    } else {
		print STDERR "fail link: you have priviliges?\n";
	    }
	    
	}
    }
}
if ($opt_d) {
    my $dirname; 
    while($dirname=shift @dirlist){
	print STDERR "rmdir $dirname\n";
    }
} else {
    if ($#dirlist >=0 ){

## if so many dirs are exist, "exec" will fail(overflow arguments). 
## But, this is faster than perl rmdir function.

	exec 'rmdir', @dirlist; 

## if it returns, it is error on system call execpv(3).	

	unless ($! == E2BIG){ 
	    die "ERROR: Cannot do rmdir bacause: $!\n";
	}
	# retry do it by safe way.
	print STDERR "system call fails, we fall back safe way.\n";
	my $dirname;
	while($dirname=shift @dirlist){
	    rmdir $dirname;       # this may be slow but safe;
	}
    }
}
exit;

#===========================================================
# subroutines.
#
#---------------------------------------------------------
# convert regular sjis to compressed sjis.
# using binary snearch method.
# 
sub sjis2csjis {
    local(*line) = @_;
    $line =~ s/($match{SJIS_C})/&s2c($1)/geo;
}

sub s2c {
    local($char) = @_;
    $mb=ord($char)*256+ord(substr($char,1));
    return $char if ($mb < $sjisconv[0]);	           # checkk if it is 
    return $GETA if ($sjisconv[$sjisconvlen*3+1] < $mb);   # target to conv?
    $min=0;$max=$sjisconvlen;		# OK is will be conv
    while($max >= $min){
	$j=$min+($max-$min)%2;
	if ($sjisconv[$j*3+0] > $mb){		 
	    $max=$j-1;
	} elsif ($mb > $sjisconv[$j*3+1]) {
	    $min=$j+1;
	} else {
	    $mb=$sjisconv[$j*3+2]+$mb-$sjisconv[$j*3+0];
	    $char=sprintf "%c%c",($mb >> 8) & 0xff,$mb & 0xff;
	    break;
	}
    } 
    $char=$GETA if ( $mb > 0xf000);
    $char;
}
#---------------------------------------------------------
#
# convert regular sjis to regular sjis.
# using binary snearch method.
# 
sub sjis2rsjis {
    local(*line) = @_;
    $line =~ s/($match{SJIS_C})/&s2r($1)/geo;
}

sub s2r {
    local($char) = @_;
    $mb=ord($char)*256 + ord(substr($char,1));
    return $char if ($mb < $sjisreg[0]);	# checkk if it is 
    if ($sjisreg[$sjisreglen*3+1] < $mb){	# target to conv?
	$char=$GETA if ($mb > 0xfc4b);
	return $char;
    };
    $min=0;$max=$sjisreglen;		# OK is will be conv
    while($max >= $min){
	    $j=$min+($max-$min)%2;
	    if ($sjisreg[$j*3+0] > $mb){		 
		$max=$j-1;
	    } elsif ($mb > $sjisreg[$j*3+1]) {
		$min=$j+1;
	    } else {					# hit it!
		$mb=$sjisreg[$j*3+2]+$mb-$sjisreg[$j*3+0];
		$char=sprintf "%c%c",($mb >> 8) & 0xff, $mb & 0xff;
		break;
	    }
	} 
$char;
}


#---------------------------------------------------------

sub Show_Usage {
    print <<__EOL__;
This is samba companion tool. smbchartool Ver.1.6.
This product is distoributed under the GNU Public License Vertion 2.

Usage: command [-h][-d][-l][-a <action>] [-p <linkpath>] <convpath>
  -h: show this help message
  -d: debug option. Don't actual move and removing, only display how to do
  -l: make hard link, not move it
  -p: make link to another directory
     (it may be use with -l)
  -a: point out an action

Available command names or actions are....
  euctohex, euctocap, euctojis, euctosjis
  jistohex, jistocap, jistoeuc, jistosjis
  captohex, captoeuc, captojis, captosjis
  hextoeuc, hextocap, hextojis, hextosjis
  sjistohex, sjistocap, sjistojis, sjistoeuc
  sjistosjis, captocap, hextohex, euctoeuc, jistojis

Copyright (c) 2000, Hiroshi MIURA <miura\@samba.gr.jp>
__EOL__
exit;
}
