hackedteam/vector-rmi

View on GitHub
wps/wbxml2/wbxml_charset.c

Summary

Maintainability
Test Coverage
/*
 * libwbxml, the WBXML Library.
 * Copyright (C) 2002-2005 Aymerick Jehanne <aymerick@jehanne.org>
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 * 
 * LGPL v2.1: http://www.gnu.org/copyleft/lesser.txt
 * 
 * Contact: libwbxml@aymerick.com
 * Home: http://libwbxml.aymerick.com
 */
 
/**
 * @file wbxml_charset.c
 * @ingroup wbxml_charset
 *
 * @author Aymerick Jehanne <libwbxml@aymerick.com>
 * @date 04/03/24
 *
 * @brief Charset Functions
 */

#include "wbxml.h"


/* Structures */

/** WBXML Charset */
typedef struct WBXMLCharsetEntry_s {
    const WB_TINY       *name;     /**< Charset Name */
    WBXMLCharsetMIBEnum  mib_enum; /**< Charset MIBEnum Value */
} WBXMLCharsetEntry;


/* Globals */

/**
 * @brief Charset table
 * @note  From http://www.iana.org/assignments/character-sets
 */
static const WBXMLCharsetEntry wbxml_charset_entries[] =
{
    { "US-ASCII",        WBXML_CHARSET_US_ASCII        },
    { "ISO-8859-1",      WBXML_CHARSET_ISO_8859_1      },
    { "ISO-8859-2",      WBXML_CHARSET_ISO_8859_2      },
    { "ISO-8859-3",      WBXML_CHARSET_ISO_8859_3      },
    { "ISO-8859-4",      WBXML_CHARSET_ISO_8859_4      },
    { "ISO-8859-5",      WBXML_CHARSET_ISO_8859_5      },
    { "ISO-8859-6",      WBXML_CHARSET_ISO_8859_6      },
    { "ISO-8859-7",      WBXML_CHARSET_ISO_8859_7      },
    { "ISO-8859-8",      WBXML_CHARSET_ISO_8859_8      },
    { "ISO-8859-9",      WBXML_CHARSET_ISO_8859_9      },
    { "Shift_JIS",       WBXML_CHARSET_SHIFT_JIS       },
    { "UTF-8",           WBXML_CHARSET_UTF_8           },
    { "ISO-10646-UCS-2", WBXML_CHARSET_ISO_10646_UCS_2 },
    { "UTF-16",          WBXML_CHARSET_UTF_16          },
    { "Big5",            WBXML_CHARSET_BIG5            }
};


/* Private Functions Prototypes */
static WB_BOOL binary_search(const WB_TINY *in_buf,
                             WB_ULONG       in_buf_len,
                             const WB_TINY *in_seq,
                             WB_ULONG       in_seq_len,
                             WB_ULONG      *out_pos);


/***************************************************
 *    Public Functions
 */

WBXML_DECLARE(WB_BOOL) wbxml_charset_get_mib(const WB_TINY       *name,
                                             WBXMLCharsetMIBEnum *mib_enum)
{
    WB_ULONG i = 0;
  
    for (i = 0; i < WBXML_TABLE_SIZE(wbxml_charset_entries); i++) {
        if (WBXML_STRCASECMP(name, wbxml_charset_entries[i].name) == 0) {
            if (mib_enum != NULL) {
                *mib_enum = wbxml_charset_entries[i].mib_enum;
            }

            return TRUE;
        }
    }

    return FALSE;
}


WBXML_DECLARE(WB_BOOL) wbxml_charset_get_name(WBXMLCharsetMIBEnum   mib_enum,
                                              const WB_TINY       **name)
{
    WB_ULONG i = 0;
  
    for (i = 0; i < WBXML_TABLE_SIZE(wbxml_charset_entries); i++) {
        if (mib_enum == wbxml_charset_entries[i].mib_enum) {
            if (name != NULL) {
                *name = wbxml_charset_entries[i].name;
            }
      
            return TRUE;
        }
    }
  
    return FALSE;
}


WBXML_DECLARE(WBXMLError) wbxml_charset_conv(const WB_TINY        *in_buf,
                                             WB_ULONG             *io_bytes,
                                             WBXMLCharsetMIBEnum   in_charset,
                                             WBXMLBuffer         **out_buf,
                                             WBXMLCharsetMIBEnum   out_charset)
{
    /**************************************************
     * First, check for simple US-ASCII / UTF-8 cases
     */

    /* Are we dealing with US-ASCII or UTF-8 ? */
    if (((in_charset  == WBXML_CHARSET_US_ASCII) || (in_charset  == WBXML_CHARSET_UTF_8)) &&
        ((out_charset == WBXML_CHARSET_US_ASCII) || (out_charset == WBXML_CHARSET_UTF_8)))
    {
        /* Create a static buffer */
        if ((*out_buf = wbxml_buffer_sta_create_from_cstr(in_buf)) == NULL) {
            return WBXML_ERROR_NOT_ENOUGH_MEMORY;
        }

        /* US-ASCII and UTF-8 are NULL terminated */
        *io_bytes -= WBXML_STRLEN(in_buf) + 1;
    
        return WBXML_OK;
    }
  
    /**************************************
     * Ok guys, we really have to convert
     */
  
#if defined( HAVE_ICONV )
  
    {
        /**********************
         * The iconv way
         */
    
        const WB_TINY * charset_to   = NULL;
        const WB_TINY * charset_from = NULL;
        WB_TINY       * tmp_buf      = NULL;
        WB_TINY       * tmp_ptr      = NULL;
        WB_ULONG        tmp_buf_len  = 0;
        WB_ULONG        tmp_len_left = 0;
        WBXMLError      ret          = WBXML_OK;
        iconv_t         cd           = 0;
        WB_UTINY        last_char    = 0;
    
        /* Get Charsets names */
        if (!wbxml_charset_get_name(in_charset, &charset_from)) {
            return WBXML_ERROR_CHARSET_UNKNOWN;
        }
  
        if (!wbxml_charset_get_name(out_charset, &charset_to)) {
            return WBXML_ERROR_CHARSET_UNKNOWN;
        }
    
        /* Init iconv */
        if ((cd = iconv_open(charset_to, charset_from)) == (iconv_t)(-1))
        {
            /* Init failed */
            return WBXML_ERROR_CHARSET_CONV_INIT;
        }
    
        /* Allocate maximum result buffer (4 bytes unicode) */
        tmp_len_left = tmp_buf_len = 4 * (sizeof(WB_TINY) * (*io_bytes));
    
        if ((tmp_buf = (WB_TINY *) wbxml_malloc(tmp_buf_len)) == NULL) {
            iconv_close(cd);
            return WBXML_ERROR_NOT_ENOUGH_MEMORY;
        }

        tmp_ptr = tmp_buf;
    
        /* Convert ! */
        (void) iconv(cd,
                     &in_buf,
                     (size_t*)io_bytes,
                     &tmp_buf,
                     (size_t*)&tmp_len_left);

        /** @todo Check errno (but it doesn't seems to work on windows) */

        if (tmp_buf_len > tmp_len_left) {
            /* Create result buffer */
            if ((*out_buf = wbxml_buffer_create(tmp_ptr,
                                                tmp_buf_len - tmp_len_left,
                                                tmp_buf_len - tmp_len_left)) == NULL)
            {
                /* Not enough memory */
                ret = WBXML_ERROR_NOT_ENOUGH_MEMORY;
            }

            /* Remove trailing NULL char */
            wbxml_buffer_remove_trailing_zeros(out_buf);
        }
        else
        {
            /* Not converted */
            ret = WBXML_ERROR_CHARSET_CONV;
        }
    
        /* Shutdown iconv */
        iconv_close(cd);
    
        /* Clean-up */
        wbxml_free(tmp_ptr);

        return ret;
    }
  
#else
  
    {
        /***************************************************
         * Add your own charset conversion function here !
         */
    
        return WBXML_ERROR_NO_CHARSET_CONV;
    }
  
#endif /* HAVE_ICONV */
}


WBXML_DECLARE(WBXMLError) wbxml_charset_conv_term(const WB_TINY        *in_buf,
                                                  WB_ULONG             *io_bytes,
                                                  WBXMLCharsetMIBEnum   in_charset,
                                                  WBXMLBuffer         **out_buf,
                                                  WBXMLCharsetMIBEnum   out_charset)
{
    WB_ULONG   buf_len  = 0;
    WB_ULONG   new_len  = 0;
    WB_ULONG   term_len = 0;
    WBXMLError ret      = WBXML_OK;
  
    /* Find length of input buffer */
    switch (in_charset)
    {
    case WBXML_CHARSET_ISO_10646_UCS_2 :
    case WBXML_CHARSET_UTF_16 :
        /* Terminated by two NULL char ("\0\0") */
        term_len = 2;

        if (!binary_search(in_buf, *io_bytes, "\0\0", 2, &buf_len)) {
            return WBXML_ERROR_CHARSET_STR_LEN;
        }

        /* Add termination bytes length */
        buf_len += term_len;
        break;
    
    default :
        /* Terminated by a simple NULL char ('\0') */
        term_len = 1;

        buf_len = WBXML_STRLEN(in_buf) + term_len;
        break;
    }

    /* Check length found */
    if (buf_len > *io_bytes) {
        return WBXML_ERROR_CHARSET_STR_LEN;
    }

    /* Use a temporary length var (because it is decreased) */
    new_len = buf_len;
  
    /* Convert ! */
    ret = wbxml_charset_conv(in_buf, 
                             &new_len,
                             in_charset,
                             out_buf,
                             out_charset);
  
    /* Set input buffer length */           
    *io_bytes = buf_len;
  
    return ret;
}


/***************************************************
 *    Private Functions
 */

/**
 * Binary search of a sequence of bytes into a buffer
 *
 * @param in_buf     Buffer to search in
 * @param in_buf_len Length of input buffer
 * @param in_seq     Sequence to search
 * @param in_seq_len Length of sequence
 * @param out_pos    Index of Sequence into Buffer
 * @return TRUE if found, FALSE otherwise
 */
static WB_BOOL binary_search(const WB_TINY *in_buf,
                             WB_ULONG       in_buf_len,
                             const WB_TINY *in_seq,
                             WB_ULONG       in_seq_len,
                             WB_ULONG      *out_pos)
{
    /** @todo binary_search() / See wbxml_buffer_search() */

    return FALSE;
}