mangroveorg/datawinners

View on GitHub
datawinners/gsm0338.py

Summary

Maintainability
D
1 day
Test Coverage
# vim: ai ts=4 sts=4 et sw=4 encoding=utf-8

""" Python Character Mapping Codec based on gsm0338 generated from './GSM0338.TXT' with gencodec.py.

    With extra sauce to deal with the 'multibyte' extensions!

"""#"

import codecs
import re

### Codec APIs

#
# Shared funcs
#
def _encode(input,errors='strict'):
    # split to see if we have any 'extended' characters
    runs=unicode_splitter.split(input)

    # now iterate through handling any 'multibyte' ourselves
    out_str=list()
    consumed=0
    extended=extended_encode_map.keys()
    for run in runs:
        if len(run)==1 and run[0] in extended:
            out_str.append(extended_indicator+extended_encode_map[run])
            consumed+=1
        else:
            # pass it to the standard encoder
            out,cons=codecs.charmap_encode(run,errors,encoding_table)
            out_str.append(out)
            consumed+=cons
    return (''.join(out_str),consumed)

def _decode(input,errors='strict'):
    # opposite of above, look for multibye 'marker'
    # and handle it ourselves, pass the rest to the
    # standard decoder

    # split to see if we have any 'extended' characters
    runs = str_splitter.split(input)

    # now iterate through handling any 'multibyte' ourselves
    out_uni = []
    consumed = 0
    for run in runs:
        if len(run)==0:
            # first char was a marker, but we don't care
            # the marker itself will come up in the next run
            continue
        if len(run)==2 and run[0]==extended_indicator:
            try:
                out_uni.append(extended_decode_map[run[1]])
                consumed += 2
                continue
            except KeyError:
                # second char was not an extended, so
                # let this pass through and the marker
                # will be interpreted by the table as a NBSP
                pass

        # pass it to the standard encoder
        out,cons=codecs.charmap_decode(run,errors,decoding_table)
        out_uni.append(out)
        consumed+=cons
    return (u''.join(out_uni),consumed)


class Codec(codecs.Codec):
    def encode(self,input,errors='strict'):
        return _encode(input,errors)

    def decode(self,input,errors='strict'):
        # strip any trailing '\x00's as the standard
        # says trailing ones are _not_ @'s and
        # are in fact blanks
        if input[-1]=='\x00':
            input=input[:-1]
        return _decode(input,errors)

class IncrementalEncoder(codecs.IncrementalEncoder):
    def encode(self, input, final=False):
        # just use the standard encoding as there is no need
        # to hold state
        return _encode(input,self.errors)[0]

class IncrementalDecoder(codecs.IncrementalDecoder):
    # a little trickier 'cause input _might_ come in
    # split right on the extended char marker boundary
    def __init__(self,errors='strict'):
        codecs.IncrementalDecoder.__init__(self,errors)
        self.last_saw_mark=False

    def decode(self, input, final=False):
        if final:
            # check for final '\x00' which should not
            # be interpreted as a '@'
            if input[-1]=='\x00':
                input=input[:-1]

        # keep track of how many chars we've added or
        # removed to the run to adjust the response from
        # _decode
        consumed_delta=0
        # see if last char was a 2-byte mark
        if self.last_saw_mark:
            # add it back to the current run
            input=extended_indicator+input
            consumed_delta-=1 # 'cause we added a char
            self.last_saw_mark=False # reset
        if input[-1:]==extended_indicator and not final:
            # chop it off
            input=input[:-1]
            consumed_delta+=1 # because we just consumed one char
            self.last_saw_mark=True

            # NOTE: if we are final and last mark is
            # and extended indicator, it will be interpreted
            # as NBSP
        return _decode(input,self.errors)[0]

    def reset(self):
        self.last_saw_mark=False

class StreamWriter(Codec,codecs.StreamWriter):
    pass

class StreamReader(Codec,codecs.StreamReader):
    pass

### encodings module API

def getregentry():
    return codecs.CodecInfo(
        name='gsm0338',
        encode=Codec().encode,
        decode=Codec().decode,
        incrementalencoder=IncrementalEncoder,
        incrementaldecoder=IncrementalDecoder,
        streamreader=StreamReader,
        streamwriter=StreamWriter,
    )


### Decoding Tables

# gsm 'extended' character.
# gsm, annoyingly, is MOSTLY 7-bit chars
#
# BUT has 10 'extended' chars represented
# by 2-chars, an idicator, and then one of
# the 10

# first of the 2-chars is indicator
extended_indicator='\x1b'

# second char is the 'extended' character
extended_encode_map = { # Unicode->GSM string
    u'\x0c':'\x0a',  # FORM FEED
    u'^':'\x14',     # CIRCUMFLEX ACCENT
    u'{':'\x28',     # LEFT CURLY BRACKET
    u'}':'\x29',     # RIGHT CURLY BRACKET
    u'\\':'\x2f',    # REVERSE SOLIDUS
    u'[':'\x3c',     # LEFT SQUARE BRACKET
    u'~':'\x3d',     # TILDE
    u']':'\x3e',     # RIGHT SQUARE BRACKET
    u'|':'\x40',     # VERTICAL LINE
    u'\u20ac':'\x65' # EURO SIGN
}

# reverse the map above for decoding
# GSM String->Unicode
uni,gsm=zip(*extended_encode_map.items())
extended_decode_map=dict(zip(gsm,uni))

# splitter
str_splitter=re.compile('(%(ind)s[^%(ind)s])' % { 'ind':extended_indicator })
unicode_splitter=re.compile(u'([%s])' % re.escape(''.join(extended_encode_map.keys())), re.UNICODE)

# the normal 1-char table
decoding_table = (
    u'@'        #  0x00 -> COMMERCIAL AT
    u'\xa3'     #  0x01 -> POUND SIGN
    u'$'        #  0x02 -> DOLLAR SIGN
    u'\xa5'     #  0x03 -> YEN SIGN
    u'\xe8'     #  0x04 -> LATIN SMALL LETTER E WITH GRAVE
    u'\xe9'     #  0x05 -> LATIN SMALL LETTER E WITH ACUTE
    u'\xf9'     #  0x06 -> LATIN SMALL LETTER U WITH GRAVE
    u'\xec'     #  0x07 -> LATIN SMALL LETTER I WITH GRAVE
    u'\xf2'     #  0x08 -> LATIN SMALL LETTER O WITH GRAVE
    u'\xe7'     #  0x09 -> LATIN SMALL LETTER C WITH CEDILLA
    u'\n'       #  0x0A -> LINE FEED
    u'\xd8'     #  0x0B -> LATIN CAPITAL LETTER O WITH STROKE
    u'\xf8'     #  0x0C -> LATIN SMALL LETTER O WITH STROKE
    u'\r'       #  0x0D -> CARRIAGE RETURN
    u'\xc5'     #  0x0E -> LATIN CAPITAL LETTER A WITH RING ABOVE
    u'\xe5'     #  0x0F -> LATIN SMALL LETTER A WITH RING ABOVE
    u'\u0394'   #  0x10 -> GREEK CAPITAL LETTER DELTA
    u'_'        #  0x11 -> LOW LINE
    u'\u03a6'   #  0x12 -> GREEK CAPITAL LETTER PHI
    u'\u0393'   #  0x13 -> GREEK CAPITAL LETTER GAMMA
    u'\u039b'   #  0x14 -> GREEK CAPITAL LETTER LAMDA
    u'\u03a9'   #  0x15 -> GREEK CAPITAL LETTER OMEGA
    u'\u03a0'   #  0x16 -> GREEK CAPITAL LETTER PI
    u'\u03a8'   #  0x17 -> GREEK CAPITAL LETTER PSI
    u'\u03a3'   #  0x18 -> GREEK CAPITAL LETTER SIGMA
    u'\u0398'   #  0x19 -> GREEK CAPITAL LETTER THETA
    u'\u039e'   #  0x1A -> GREEK CAPITAL LETTER XI
    u'\xa0'     #  0x1B -> ESCAPE TO EXTENSION TABLE (or displayed as NBSP, see note above)
    u'\xc6'     #  0x1C -> LATIN CAPITAL LETTER AE
    u'\xe6'     #  0x1D -> LATIN SMALL LETTER AE
    u'\xdf'     #  0x1E -> LATIN SMALL LETTER SHARP S (German)
    u'\xc9'     #  0x1F -> LATIN CAPITAL LETTER E WITH ACUTE
    u' '        #  0x20 -> SPACE
    u'!'        #  0x21 -> EXCLAMATION MARK
    u'"'        #  0x22 -> QUOTATION MARK
    u'#'        #  0x23 -> NUMBER SIGN
    u'\xa4'     #  0x24 -> CURRENCY SIGN
    u'%'        #  0x25 -> PERCENT SIGN
    u'&'        #  0x26 -> AMPERSAND
    u"'"        #  0x27 -> APOSTROPHE
    u'('        #  0x28 -> LEFT PARENTHESIS
    u')'        #  0x29 -> RIGHT PARENTHESIS
    u'*'        #  0x2A -> ASTERISK
    u'+'        #  0x2B -> PLUS SIGN
    u','        #  0x2C -> COMMA
    u'-'        #  0x2D -> HYPHEN-MINUS
    u'.'        #  0x2E -> FULL STOP
    u'/'        #  0x2F -> SOLIDUS
    u'0'        #  0x30 -> DIGIT ZERO
    u'1'        #  0x31 -> DIGIT ONE
    u'2'        #  0x32 -> DIGIT TWO
    u'3'        #  0x33 -> DIGIT THREE
    u'4'        #  0x34 -> DIGIT FOUR
    u'5'        #  0x35 -> DIGIT FIVE
    u'6'        #  0x36 -> DIGIT SIX
    u'7'        #  0x37 -> DIGIT SEVEN
    u'8'        #  0x38 -> DIGIT EIGHT
    u'9'        #  0x39 -> DIGIT NINE
    u':'        #  0x3A -> COLON
    u';'        #  0x3B -> SEMICOLON
    u'<'        #  0x3C -> LESS-THAN SIGN
    u'='        #  0x3D -> EQUALS SIGN
    u'>'        #  0x3E -> GREATER-THAN SIGN
    u'?'        #  0x3F -> QUESTION MARK
    u'\xa1'     #  0x40 -> INVERTED EXCLAMATION MARK
    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
    u'\xc4'     #  0x5B -> LATIN CAPITAL LETTER A WITH DIAERESIS
    u'\xd6'     #  0x5C -> LATIN CAPITAL LETTER O WITH DIAERESIS
    u'\xd1'     #  0x5D -> LATIN CAPITAL LETTER N WITH TILDE
    u'\xdc'     #  0x5E -> LATIN CAPITAL LETTER U WITH DIAERESIS
    u'\xa7'     #  0x5F -> SECTION SIGN
    u'\xbf'     #  0x60 -> INVERTED QUESTION MARK
    u'a'        #  0x61 -> LATIN SMALL LETTER A
    u'b'        #  0x62 -> LATIN SMALL LETTER B
    u'c'        #  0x63 -> LATIN SMALL LETTER C
    u'd'        #  0x64 -> LATIN SMALL LETTER D
    u'e'        #  0x65 -> LATIN SMALL LETTER E
    u'f'        #  0x66 -> LATIN SMALL LETTER F
    u'g'        #  0x67 -> LATIN SMALL LETTER G
    u'h'        #  0x68 -> LATIN SMALL LETTER H
    u'i'        #  0x69 -> LATIN SMALL LETTER I
    u'j'        #  0x6A -> LATIN SMALL LETTER J
    u'k'        #  0x6B -> LATIN SMALL LETTER K
    u'l'        #  0x6C -> LATIN SMALL LETTER L
    u'm'        #  0x6D -> LATIN SMALL LETTER M
    u'n'        #  0x6E -> LATIN SMALL LETTER N
    u'o'        #  0x6F -> LATIN SMALL LETTER O
    u'p'        #  0x70 -> LATIN SMALL LETTER P
    u'q'        #  0x71 -> LATIN SMALL LETTER Q
    u'r'        #  0x72 -> LATIN SMALL LETTER R
    u's'        #  0x73 -> LATIN SMALL LETTER S
    u't'        #  0x74 -> LATIN SMALL LETTER T
    u'u'        #  0x75 -> LATIN SMALL LETTER U
    u'v'        #  0x76 -> LATIN SMALL LETTER V
    u'w'        #  0x77 -> LATIN SMALL LETTER W
    u'x'        #  0x78 -> LATIN SMALL LETTER X
    u'y'        #  0x79 -> LATIN SMALL LETTER Y
    u'z'        #  0x7A -> LATIN SMALL LETTER Z
    u'\xe4'     #  0x7B -> LATIN SMALL LETTER A WITH DIAERESIS
    u'\xf6'     #  0x7C -> LATIN SMALL LETTER O WITH DIAERESIS
    u'\xf1'     #  0x7D -> LATIN SMALL LETTER N WITH TILDE
    u'\xfc'     #  0x7E -> LATIN SMALL LETTER U WITH DIAERESIS
    u'\xe0'     #  0x7F -> LATIN SMALL LETTER A WITH GRAVE
    u'\ufffe'   #  0x80 -> UNDEFINED
    u'\ufffe'   #  0x81 -> UNDEFINED
    u'\ufffe'   #  0x82 -> UNDEFINED
    u'\ufffe'   #  0x83 -> UNDEFINED
    u'\ufffe'   #  0x84 -> UNDEFINED
    u'\ufffe'   #  0x85 -> UNDEFINED
    u'\ufffe'   #  0x86 -> UNDEFINED
    u'\ufffe'   #  0x87 -> UNDEFINED
    u'\ufffe'   #  0x88 -> UNDEFINED
    u'\ufffe'   #  0x89 -> UNDEFINED
    u'\ufffe'   #  0x8A -> UNDEFINED
    u'\ufffe'   #  0x8B -> UNDEFINED
    u'\ufffe'   #  0x8C -> UNDEFINED
    u'\ufffe'   #  0x8D -> UNDEFINED
    u'\ufffe'   #  0x8E -> UNDEFINED
    u'\ufffe'   #  0x8F -> UNDEFINED
    u'\ufffe'   #  0x90 -> UNDEFINED
    u'\ufffe'   #  0x91 -> UNDEFINED
    u'\ufffe'   #  0x92 -> UNDEFINED
    u'\ufffe'   #  0x93 -> UNDEFINED
    u'\ufffe'   #  0x94 -> UNDEFINED
    u'\ufffe'   #  0x95 -> UNDEFINED
    u'\ufffe'   #  0x96 -> UNDEFINED
    u'\ufffe'   #  0x97 -> UNDEFINED
    u'\ufffe'   #  0x98 -> UNDEFINED
    u'\ufffe'   #  0x99 -> UNDEFINED
    u'\ufffe'   #  0x9A -> UNDEFINED
    u'\ufffe'   #  0x9B -> UNDEFINED
    u'\ufffe'   #  0x9C -> UNDEFINED
    u'\ufffe'   #  0x9D -> UNDEFINED
    u'\ufffe'   #  0x9E -> UNDEFINED
    u'\ufffe'   #  0x9F -> UNDEFINED
    u'\ufffe'   #  0xA0 -> UNDEFINED
    u'\ufffe'   #  0xA1 -> UNDEFINED
    u'\ufffe'   #  0xA2 -> UNDEFINED
    u'\ufffe'   #  0xA3 -> UNDEFINED
    u'\ufffe'   #  0xA4 -> UNDEFINED
    u'\ufffe'   #  0xA5 -> UNDEFINED
    u'\ufffe'   #  0xA6 -> UNDEFINED
    u'\ufffe'   #  0xA7 -> UNDEFINED
    u'\ufffe'   #  0xA8 -> UNDEFINED
    u'\ufffe'   #  0xA9 -> UNDEFINED
    u'\ufffe'   #  0xAA -> UNDEFINED
    u'\ufffe'   #  0xAB -> UNDEFINED
    u'\ufffe'   #  0xAC -> UNDEFINED
    u'\ufffe'   #  0xAD -> UNDEFINED
    u'\ufffe'   #  0xAE -> UNDEFINED
    u'\ufffe'   #  0xAF -> UNDEFINED
    u'\ufffe'   #  0xB0 -> UNDEFINED
    u'\ufffe'   #  0xB1 -> UNDEFINED
    u'\ufffe'   #  0xB2 -> UNDEFINED
    u'\ufffe'   #  0xB3 -> UNDEFINED
    u'\ufffe'   #  0xB4 -> UNDEFINED
    u'\ufffe'   #  0xB5 -> UNDEFINED
    u'\ufffe'   #  0xB6 -> UNDEFINED
    u'\ufffe'   #  0xB7 -> UNDEFINED
    u'\ufffe'   #  0xB8 -> UNDEFINED
    u'\ufffe'   #  0xB9 -> UNDEFINED
    u'\ufffe'   #  0xBA -> UNDEFINED
    u'\ufffe'   #  0xBB -> UNDEFINED
    u'\ufffe'   #  0xBC -> UNDEFINED
    u'\ufffe'   #  0xBD -> UNDEFINED
    u'\ufffe'   #  0xBE -> UNDEFINED
    u'\ufffe'   #  0xBF -> UNDEFINED
    u'\ufffe'   #  0xC0 -> UNDEFINED
    u'\ufffe'   #  0xC1 -> UNDEFINED
    u'\ufffe'   #  0xC2 -> UNDEFINED
    u'\ufffe'   #  0xC3 -> UNDEFINED
    u'\ufffe'   #  0xC4 -> UNDEFINED
    u'\ufffe'   #  0xC5 -> UNDEFINED
    u'\ufffe'   #  0xC6 -> UNDEFINED
    u'\ufffe'   #  0xC7 -> UNDEFINED
    u'\ufffe'   #  0xC8 -> UNDEFINED
    u'\ufffe'   #  0xC9 -> UNDEFINED
    u'\ufffe'   #  0xCA -> UNDEFINED
    u'\ufffe'   #  0xCB -> UNDEFINED
    u'\ufffe'   #  0xCC -> UNDEFINED
    u'\ufffe'   #  0xCD -> UNDEFINED
    u'\ufffe'   #  0xCE -> UNDEFINED
    u'\ufffe'   #  0xCF -> UNDEFINED
    u'\ufffe'   #  0xD0 -> UNDEFINED
    u'\ufffe'   #  0xD1 -> UNDEFINED
    u'\ufffe'   #  0xD2 -> UNDEFINED
    u'\ufffe'   #  0xD3 -> UNDEFINED
    u'\ufffe'   #  0xD4 -> UNDEFINED
    u'\ufffe'   #  0xD5 -> UNDEFINED
    u'\ufffe'   #  0xD6 -> UNDEFINED
    u'\ufffe'   #  0xD7 -> UNDEFINED
    u'\ufffe'   #  0xD8 -> UNDEFINED
    u'\ufffe'   #  0xD9 -> UNDEFINED
    u'\ufffe'   #  0xDA -> UNDEFINED
    u'\ufffe'   #  0xDB -> UNDEFINED
    u'\ufffe'   #  0xDC -> UNDEFINED
    u'\ufffe'   #  0xDD -> UNDEFINED
    u'\ufffe'   #  0xDE -> UNDEFINED
    u'\ufffe'   #  0xDF -> UNDEFINED
    u'\ufffe'   #  0xE0 -> UNDEFINED
    u'\ufffe'   #  0xE1 -> UNDEFINED
    u'\ufffe'   #  0xE2 -> UNDEFINED
    u'\ufffe'   #  0xE3 -> UNDEFINED
    u'\ufffe'   #  0xE4 -> UNDEFINED
    u'\ufffe'   #  0xE5 -> UNDEFINED
    u'\ufffe'   #  0xE6 -> UNDEFINED
    u'\ufffe'   #  0xE7 -> UNDEFINED
    u'\ufffe'   #  0xE8 -> UNDEFINED
    u'\ufffe'   #  0xE9 -> UNDEFINED
    u'\ufffe'   #  0xEA -> UNDEFINED
    u'\ufffe'   #  0xEB -> UNDEFINED
    u'\ufffe'   #  0xEC -> UNDEFINED
    u'\ufffe'   #  0xED -> UNDEFINED
    u'\ufffe'   #  0xEE -> UNDEFINED
    u'\ufffe'   #  0xEF -> UNDEFINED
    u'\ufffe'   #  0xF0 -> UNDEFINED
    u'\ufffe'   #  0xF1 -> UNDEFINED
    u'\ufffe'   #  0xF2 -> UNDEFINED
    u'\ufffe'   #  0xF3 -> UNDEFINED
    u'\ufffe'   #  0xF4 -> UNDEFINED
    u'\ufffe'   #  0xF5 -> UNDEFINED
    u'\ufffe'   #  0xF6 -> UNDEFINED
    u'\ufffe'   #  0xF7 -> UNDEFINED
    u'\ufffe'   #  0xF8 -> UNDEFINED
    u'\ufffe'   #  0xF9 -> UNDEFINED
    u'\ufffe'   #  0xFA -> UNDEFINED
    u'\ufffe'   #  0xFB -> UNDEFINED
    u'\ufffe'   #  0xFC -> UNDEFINED
    u'\ufffe'   #  0xFD -> UNDEFINED
    u'\ufffe'   #  0xFE -> UNDEFINED
    u'\ufffe'   #  0xFF -> UNDEFINED
)

encoding_table=codecs.charmap_build(decoding_table)


if __name__ == "__main__":
    """
    Run this as a script for poor-man's unit tests

    """
    isoLatin15_alpha=u" !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJLKMNOPQRSTUVWXYZ[\\]^-`abcdefghijklmnopqrstuvwxyz{|}~¡¢£€¥Š§š©ª«¬®¯°±²³Žµ¶·ž¹º»ŒœŸ¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"

    gsm_alpha=u"\u00A0@£$¥èéùìòçØøÅåΔ_ΦΓΛΩΠΨΣΘΞ^{}\\[~]|\u00A0\u00A0€ÆæßÉ !\"#¤%&'()*+,-./0123456789:;<=>?¡ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÑܧ¿abcdefghijklmnopqrstuvwxyzäöñüà\u00A0"

    gsm_alpha_encoded='1b000102030405060708090b0c0e0f101112131415161718191a1b141b281b291b2f1b3c1b3d1b3e1b401b1b1b651c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f1b'

    gsm_alpha_gsm=gsm_alpha_encoded.decode('hex')


    # some simple tests
    print "Assert GSM alphabet, encoded in GSM is correct (unicode->gsm_str)..."
    encoded=_encode(gsm_alpha)[0].encode('hex')
    print encoded
    assert(encoded==gsm_alpha_encoded)
    print "Good"
    print
    print "Assert GSM encoded string converts to correct Unicode (gsm_str->unicode)..."
    assert(_decode(gsm_alpha_gsm)[0]==gsm_alpha)
    print "Good"
    print

    # test Codec objects
    print "Try the codec objects unicode_test_str->encode->decode==unicode_test_str..."
    c=Codec()
    gsm_str,out=c.encode(gsm_alpha)
    assert(c.decode(gsm_str)[0]==gsm_alpha)
    print "Good"
    print
    print "Try the incremental codecs, same test, but loop it..."

    def _inc_encode(ie):
        encoded=list()
        hop=17 # make it something odd
        final=False
        for i in range(0,len(gsm_alpha),hop):
            end=i+hop
            if end>=len(gsm_alpha): final=True
            encoded.append(ie.encode(gsm_alpha[i:end],final))
        return ''.join(encoded)

    enc=IncrementalEncoder()
    assert(_inc_encode(enc)==gsm_alpha_gsm)
    print "Good"
    print
    print "Now do that again with the same encoder to make sure state is reset..."
    enc.reset()
    assert(_inc_encode(enc)==gsm_alpha_gsm)
    print "Good"
    print
    print "Now decode the encoded string back to unicode..."

    def _inc_decode(idec):
        decoded=list()
        # define so we KNOW we hit a mark as last char
        hop=gsm_alpha_gsm.index('\x1b')+1
        final=False
        for i in range(0,len(gsm_alpha_gsm),hop):
            end=i+hop
            if end>=len(gsm_alpha_gsm): final=True
            decoded.append(idec.decode(gsm_alpha_gsm[i:end],final))
        return ''.join(decoded)

    dec=IncrementalDecoder()
    assert(_inc_decode(dec)==gsm_alpha)
    print "Good"
    print
    print "Do it again with some decoder to make sure state is cleared..."
    dec.reset()
    assert(_inc_decode(dec)==gsm_alpha)
    print "Good"
    print