# This file es charset2otp.py
#
# (c) 2004. Javier Bezos. License: LPPL.
#
# This file creates otp/ocp files for the charset
# mechanism of the Mem package.

import sys, os

def writeenc(enc, name=None, begin=None, end=None):

    if name == None:  name = enc
    if begin == None: begin = 0x80
    if end == None:   end = 0xFF
    s = ''

    print 'Creating', name     
    
    for i in range(begin, end + 1):
        x = unicode(chr(i), enc, 'replace')
        if not (i % 8): s = s.rstrip() + '\n'
        s += '@"%04X, ' % ord(x)
    #end

    of = open('%s.mtp' % name, 'w')        

    of.write('%% This file is %s.mtp\n'
             '%%\n'
             '%% (c) 2004 Javier Bezos. License: LPPL\n'
             '%% Please, send bug reports and comments to:\n'
             '%% jbezos at wanadoo dot es\n'
             '%%\n'
             '%% It has been generated with the script charset2otp.py\n'
             '%% and the built-in Unicode data in Python version:\n'
             '%% %s\n\n'
             % (name, sys.version))

    of.write('input:  1;\n'
             'output: 2;\n\n')

    if end != 0:
        of.write('tables:\n\n'
                 '%s[@"%X] = {%s};\n\n' % (name, end-begin+1, s[:-2]))
    #end

    of.write('states: utf8;\n\n'
             'expressions:\n\n'
             '%% Built-in utf-8 parsing, so that we can escape to it,\n'
             '%% as described below.\n\n'
             '<utf8> @"1B          => <pop:>;\n'
             '<utf8> @"00-@"7F     => \\1;\n'
             '<utf8> (@"C0-@"DF)(@"80-@"BF)\n'
             '                     => #(((\\1-@"C0)*@"40) + (\\2-@"80));\n'
             '<utf8> (@"E0-@"EF)(@"80-@"BF)(@"80-@"BF)\n'
             '                     => #(((\\1-@"E0)*@"1000) + ((\\2-@"80)*@"40) + (\\3-@"80));\n'
             '<utf8> .             => @"FFFD;\n\n'
             '%% Use <esc> ("1B) to mark the beginning of a Unicode text\n'
             '%% thus escaping from the current encoding.  00 means a\n'
             '%% uft-16 code, 01 a utf-16 text, 02 a utf-8 text.  <esc>\n'
             '%% ends the block, too.  Note we can be sure "1B is not\n'
             '%% present in a utf-8 string, but that does not hold for\n'
             '%% utf-16.\n\n'
             '@"1B @"00 ^(@"1B)<1,> @"1B => "\\UseMemUnichar{" \\(*+2-1) "}";\n'
             '@"1B @"01 ^(@"1B)<1,> @"1B => \\(*+2-1);\n'
             '@"1B @"02                  => <push: utf8>;\n\n')
    if end != 0:
        of.write('@"%X-@"%X => #(%s[\\1 - @"%X]);\n'
                 % (begin, end, name, begin))
    #end

    of.write('.         => \\1;\n')    

    of.close()

    os.system('python ./mtp2ocp.py %s' % name)
    #os.remove('@%s.otp' % name)
#end

##### MAL cuando end = FF    
    
writeenc('cp1251')
writeenc('cp1252', end = 0x9F)
writeenc('cp1253')
writeenc('cp1256')
writeenc('cp1257')
writeenc('mac_roman', name = 'macstd')
writeenc('iso8859_1', name = 'isolat1', end = 0)
writeenc('iso8859_2', name = 'isolat2', begin = 0xA0)
# writeenc('iso8859_3', name = 'isolat3', begin = 0xA0) # Deprecated
writeenc('iso8859_4', name = 'isolat4', begin = 0xA0)
writeenc('iso8859_5', name = 'isocyr', begin = 0xA0)
writeenc('iso8859_6', name = 'isoara', begin = 0xA0)
writeenc('iso8859_7', name = 'isoell', begin = 0xA0)
writeenc('koi8_r', name = 'koi8ru', begin = 0xA0)
writeenc('koi8_u', name = 'koi8uk', begin = 0xA0)
# writeenc('ascii') # Special case
# writeenc('asmo') # Not available in Python


