src/mime/base64.c from jens-maus/yam

src/mime/base64.c
Summary

Maintainability

Test Coverage

Issues
/***************************************************************************

 YAM - Yet Another Mailer
 Copyright (C) 1995-2000 Marcel Beck
 Copyright (C) 2000-2022 YAM Open Source Team

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

 YAM Official Support Site :  http://www.yam.ch
 YAM OpenSource project    :  http://sourceforge.net/projects/yamos/

 $Id$

***************************************************************************/

#include <ctype.h>
#include <stdio.h>
#include <string.h>

#include <proto/exec.h>
#include <proto/codesets.h>

#include "YAM.h"

#include "mime/base64.h"

#include "Config.h"

#include "Debug.h"

// Global variables

static const unsigned char index_64[128] =
{
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
  255,255,255,255,255,255,255,255,255,255,255, 62,255,255,255, 63,
   52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255,255,255,255,
  255,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
   15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255,255,
  255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
   41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255
};

// some defines that can be usefull
#define B64_LINELEN 72    // number of chars before the b64encode_file() issues a CRLF
#define B64DEC_BUF  4096  // bytes to use as a base64 file decoding buffer
#define B64ENC_BUF  4095  // bytes to use as a base64 file encoding buffer (must be a multiple of 3)

/*** BASE64 encode/decode routines (RFC 2045) ***/
/// base64encode()
// optimized base64 encoding function returning the length of the
// encoded string.
int base64encode(char **out, const char *in, size_t inlen)
{
  int result = 0;
  char *buffer;
  size_t outlen;

  ENTER();

  // Work out how big the output buffer
  // should be. This must be a multiple of 4 bytes
  outlen = (inlen*4)/3;
  if((inlen % 3) > 0) // got to pad
    outlen += 4 - (inlen % 3);

  if(inlen > 0 && outlen > 0 &&
     (buffer = malloc(outlen + 1)) != NULL) // +1 for the \0
  {
    char *outp = buffer;
    const char *inp = in;
    unsigned char cbyte;
    unsigned char obyte;

    for(; inlen >= 3; inlen -= 3)
    {
      cbyte = *inp++;
      *outp++ = basis_64[(int)(cbyte >> 2)];
      obyte = (cbyte << 4) & 0x30;             // 0011 0000

      cbyte = *inp++;
      obyte |= (cbyte >> 4);                   // 0000 1111
      *outp++ = basis_64[(int)obyte];
      obyte = (cbyte << 2) & 0x3C;             // 0011 1100

      cbyte = *inp++;
      obyte |= (cbyte >> 6);                   // 0000 0011
      *outp++ = basis_64[(int)obyte];
      *outp++ = basis_64[(int)(cbyte & 0x3F)]; // 0011 1111
    }

    if(inlen > 0)
    {
      char end[3];

      end[0] = *inp++;
      if(--inlen)
        end[1] = *inp++;
      else
        end[1] = '\0';

      end[2] = '\0';

      cbyte = end[0];
      *outp++ = basis_64[(int)(cbyte >> 2)];
      obyte = (cbyte << 4) & 0x30;            // 0011 0000

      cbyte = end[1];
      obyte |= (cbyte >> 4);
      *outp++ = basis_64[(int)obyte];
      obyte = (cbyte << 2) & 0x3C;            // 0011 1100

      if(inlen > 0)
        *outp++ = basis_64[(int)obyte];
      else
        *outp++ = '=';

      *outp++ = '=';
    }

    // NUL-terminate the array
    *outp = '\0';

    // now write the addr of buffer to out
    *out = buffer;

    // return the length of the filled buffer
    result = outp - buffer;
  }

  RETURN(result);
  return result;
}

///
/// base64decode()
// optimized base64 decoding function returning the length of the
// decoded string or 0 on an occurred error or a minus length integer as
// an indicator of a short count in the encoded string. The source
// string doesn`t have to be NUL-terminated and only 'len' characters
// are going to be decoded. The decoding also stops as soon as the
// ending padding '==' or '=' characters are found.
int base64decode(char **out, const char *in, size_t inlen)
{
  int result = 0;
  unsigned char *buffer;

  ENTER();

  if(inlen > 0 && (inlen % 4) == 0 &&
     (buffer = malloc(inlen * 3 / 4 + 1)) != NULL)
  {
    unsigned char *inp = (unsigned char *)in;
    unsigned char *outp = buffer;

    SHOWVALUE(DBF_MIME,buffer);
    while(inlen >= 4)
    {
      unsigned char x;
      unsigned char y;

      // decrease len in advance
      inlen--;

      // get the first char, check if it is a valid b64 char and
      // convert it accordingly to index_64[]
      x = *inp++;
      if(x > 127 || (x = index_64[x]) == 255)
        break; // error

      // get the second char, check if it is a valid b64 char and
      // convert it accordingly to index_64[]
      y = *inp++;
      if(y == '\0' || y > 127 || (y = index_64[y]) == 255)
        break; // error

      inlen--;

      // put the decoded b64 char into the output buffer.
      *outp++ = (x << 2) | (y >> 4);

      // if we still have something left in the input buffer,
      // we go on with our decoding
      if(inlen > 0)
      {
        inlen--;

        // get next char
        x = *inp++;

        // check char for the padding character '='
        if(x == '=')
        {
          // check if there is still something left
          // and if so it just have to be the padding char
          if((inlen > 0 && *inp++ != '='))
            break; // error

          inlen--;

          // we received the padding string
          // lets break out here
          break; // everything fine
        }
        else
        {
          // it isn't the padding char, so is it a valid
          // b64 character instead?
          if(x > 127 || (x = index_64[x]) == 255)
            break; // error

          // put the second decoded b64 char into our output
          // buffer
          *outp++ = (y << 4) | (x >> 2);

          // and check if there is something left again..
          if(inlen > 0)
          {
            inlen--;

            // get next char
            y = *inp++;

            // is that char a padding char?
            if(y == '=')
            {
              // we received the padding string
              // lets break out here
              break; // everything fine
            }
            else if(y > 127 || (y = index_64[y]) == 255) // char valid b64?
              break; // error
            else
              *outp++ = (x << 6) | y; // decode the third char as it is valid
          }
        }
      }
    }

    // make sure the string is
    // NUL-terminated
    *outp = '\0';

    // if inlen is still > 0 it is a sign that the
    // base64 decoding aborted. So we return a minus
    // value to signal that short item count (error).
    if(inlen > 0)
      result = -(outp - buffer);
    else
      result = (outp - buffer);

    *out = (char *)buffer;
  }
  else
    *out = NULL;

  RETURN(result);
  return result;
}

///
/// base64encode_file()
//  Encodes a file in base64 format. It reads in a file from a supplied FILE*
//  pointer stepwise by filling up a buffer, encoding it and writing it down
//  as soon as it reached the length of 72 characters. This makes sure the
//  base64 encoded parts can be embeded into an RFC822 compliant mail
//  It returns the total number of encoded characters written to the destination
//  file.
long base64encode_file(FILE *in, FILE *out, BOOL convLF)
{
  char inbuffer[B64ENC_BUF*2+2];  // we use a buffer of 8192 bytes here because we read out
                                  // data in 4095 byte chunks out of file 'in' and as we
                                  // probably need to convert each LF into a CRLF we have to
                                  // have a buffer with a maximum space of 8190 bytes.
                                  // the other 2 bytes are to be safe. :)
  char *outbuffer = NULL;
  char *optr;
  BOOL eof_reached = FALSE;
  int next_unget = 0;
  int missing_chars = 0;
  int sumencoded = 0;
  int towrite;
  int encoded;
  size_t read = 0;

  ENTER();
  SHOWVALUE(DBF_MIME, convLF);

  while(eof_reached == FALSE)
  {
    // before we go on with reading in more data we move
    // the last next_unget characters of inbuffer to the start
    // of inbuffer
    if(next_unget > 0)
      memmove(inbuffer, &inbuffer[read], next_unget);

    // read in 4095 byte chunks
    read = fread(&inbuffer[0]+next_unget, 1, B64ENC_BUF-next_unget, in);
    read += next_unget;
    next_unget = 0;

    // on a short item count we check for a potential
    // error and return immediatly.
    if(read != B64ENC_BUF)
    {
      if(feof(in) != 0)
      {
        D(DBF_MIME, "EOF file at %ld", ftell(in));

        eof_reached = TRUE; // we found an EOF

        // if the last read was zero we can exit immediatly
        if(read == 0)
          break;
      }
      else
      {
        E(DBF_MIME, "error on reading data!");

        // an error occurred, lets return -1
        RETURN(-1);
        return -1;
      }
    }

    // now we check whether the user want to convert each LF into a CRLF
    // and if so we need to parse the whole read bytes for \n and convert
    // them to \r\n before the base64 encoding.
    if(convLF)
    {
      char convbuffer[B64ENC_BUF*2+2];
      char *sptr = convbuffer;
      char *dptr = inbuffer;
      long toconvert = read;
      long converted = 0;

      // lets fill the convbuffer with the data
      // of inbuffer first
      memcpy(convbuffer, inbuffer, toconvert);

      while(toconvert--)
      {
        if(*sptr == '\n')
        {
          // now write a \r first
          *dptr = '\r';
          dptr++;

          converted++;
        }

        // copy the current character;
        *dptr = *sptr;

        // increase the pointers
        dptr++;
        sptr++;
      }

      // increase the read counter
      read += converted;

      // now that we have converted something we have to
      // make sure that read is still a multiple of 3 if this
      // isn`t an EOF run.
      if(eof_reached == FALSE)
      {
        // lets check how many chars we have to skip and move
        // back later
        next_unget = read % 3;
        read -= next_unget;
      }
    }

    // now everything should be prepared so that we can call the
    // base64 encoding routine and let it convert our inbuffer to
    // the apropiate outbuffer
    encoded = base64encode(&outbuffer, inbuffer, read);
    sumencoded += encoded;

    // if the base64encoding routine returns <= 0 then there is obviously
    // something wrong
    if(encoded <= 0)
    {
      E(DBF_MIME, "error on encoding data!");

      RETURN(-1);
      return -1;
    }

    // now that we seem to have everything encoded we write out
    // the encoded sting in 72 character long chunks followed by
    // a newline
    optr = outbuffer;
    towrite = encoded;

    while(towrite > 0)
    {
      size_t todo;

      // how many chars should be written?
      if(missing_chars == 0)
      {
        if(towrite >= B64_LINELEN)
        {
          todo = B64_LINELEN;
        }
        else
          todo = towrite;
      }
      else
        todo = towrite < missing_chars ? towrite : missing_chars;

      // now we do a binary write of the data
      if(fwrite(optr, 1, todo, out) != todo)
      {
        E(DBF_MIME, "error on writing data!");

        free(outbuffer);

        // an error must have occurred.
        RETURN(-1);
        return -1;
      }

      // lets modify our counters
      towrite -= todo;
      optr += todo;

      // then we have to check whether we have written
      // a full 72 char long line or not and if so we can attach
      // a newline.
      if(missing_chars == 0 &&
         todo < B64_LINELEN && eof_reached == FALSE)
      {
        // if we end up here we don`t write any newline,
        // but we remember how many characters we are
        // going to write in advance next time.
        missing_chars = B64_LINELEN-todo;
      }
      else if((towrite > 0 || eof_reached == FALSE) && fputc('\n', out) == EOF)
      {
        E(DBF_MIME, "error on writing newline");

        free(outbuffer);

        RETURN(-1);
        return -1;
      }
      else
        missing_chars = 0;
    }

    free(outbuffer);
    outbuffer = NULL;
  }

  RETURN(sumencoded);
  return sumencoded;
}

///
/// base64decode_file()
//  Decodes a file in base64 format. Takes care of an eventually specified translation
//  table as well as a CRLF->LF translation for printable text. It reads in the base64
//  strings line by line from the in file stream, decodes it and writes out the
//  decoded data with fwrite() to the out stream. It returns the total bytes of
//  written (decoded) data. In case of an error it returns -1 and in case it
//  found a short item count during decoding it return -2 asking the user
//  to still consider the string decoded (however it should be treated with
//  care)
long base64decode_file(FILE *in, FILE *out,
                       struct codeset *srcCodeset, BOOL isText, BOOL convCRLF)
{
  char inbuffer[B64DEC_BUF+1];
  char *outbuffer = NULL;
  char ungetbuf[3];
  long decodedChars = 0;
  size_t next_unget = 0;
  BOOL eof_reached = FALSE;
  BOOL problemDuringDecode = FALSE;

  ENTER();

  D(DBF_MIME, "codeset '%s'", srcCodeset != NULL ? srcCodeset->name : "none");

  while(eof_reached == FALSE)
  {
    int outLength = 0;
    char *sptr;
    char *dptr;
    size_t read;
    size_t todo;

    // if we do have some unget chars lets copy them first at the
    // beginning of the inbuffer
    if(next_unget > 0)
      memcpy(inbuffer, ungetbuf, next_unget);

    // do a binary read of ~4096 chunks
    read = fread(&inbuffer[next_unget], sizeof(char), B64DEC_BUF-next_unget, in);

    // on a short item count we check for a potential
    // error and return immediatly.
    if(read != B64DEC_BUF-next_unget)
    {
      if(feof(in) != 0)
      {
        D(DBF_MIME, "EOF file at %ld", ftell(in));

        eof_reached = TRUE; // we found an EOF

        // if the last read was zero we can exit immediatly
        if(read == 0 && next_unget == 0)
          break;
      }
      else
      {
        E(DBF_MIME, "error on reading data!");

        // an error occurred, lets return -1
        RETURN(-1);
        return -1;
      }
    }

    // increase/reset the counters
    read += next_unget;
    next_unget = 0;

    // now that we have read 4096 bytes into our buffer
    // we have to iterate through this buffer and "eliminate"
    // white spaces which aren`t normally part of base64 encoded
    // string and can be safely skipped without
    // corrupting the decoded file.
    sptr = inbuffer;
    dptr = inbuffer;
    todo = read;

    while(todo > 0)
    {
      if(!isspace(*sptr))
      {
        *dptr = *sptr;
        dptr++;
      }
      else read--;

      sptr++;
      todo--;
    }

    // if we end up with read == 0 we had only spaces in our
    // source string, so lets skip to the next iteration
    if(read == 0)
      continue;

    // before we going to decode the string we have to make sure
    // that the encoded string is a multiple of 4 as 4 encoded
    // base64 chars will get out 2 unencoded ones.
    next_unget = read % 4;
    if(next_unget > 0)
    {
      if(eof_reached == FALSE)
      {
        read -= next_unget;
        memcpy(ungetbuf, &inbuffer[read], next_unget);
      }
      else
      {
        W(DBF_MIME, "unget chars at EOF???");

        problemDuringDecode = TRUE;
      }
    }

    // now that we have a whitespace free somewhat base64 encoded
    // string, we can call the base64decode() function to finally
    // decode the string
    if(read <= 0 ||
       (outLength = base64decode(&outbuffer, inbuffer, read)) <= 0)
    {
      E(DBF_MIME, "error on decoding: %ld %ld", read, outLength);

      if(outLength < 0)
      {
        // we faced a short item count. That can actually be a sign that the text
        // in question is not a fully base64 compliant string. However, to
        // at least display the text to the user we redefine the outLength and
        // let the write function output that string (even if not correctly
        // decoded)
        outLength = -outLength;

        problemDuringDecode = TRUE;
      }
      else
      {
        // it should not happen that we face a shortCount
        // or error
        free(outbuffer);
        RETURN(-1);
        return -1;
      }
    }

    // in case the user wants us to detect the correct cyrillic codeset
    // we do it now, but just if the source codeset isn't UTF-8
    if(C->DetectCyrillic == TRUE && isText == TRUE)
    {
      if(srcCodeset == NULL || (srcCodeset->name != NULL && stricmp(srcCodeset->name, "utf-8") != 0))
      {
        struct codeset *cs = CodesetsFindBest(CSA_Source,         outbuffer,
                                              CSA_SourceLen,      outLength,
                                              CSA_CodesetFamily,  CSV_CodesetFamily_Cyrillic,
                                              TAG_DONE);

        if(cs != NULL && cs != srcCodeset)
        {
          D(DBF_MIME, "using codeset '%s' instead of '%s'", srcCodeset != NULL ? srcCodeset->name : "none", cs->name);
          srcCodeset = cs;
        }
      }
    }

    // if the caller supplied a source codeset, we have to
    // make sure we convert our outbuffer before writing it out
    // to the file in UTF8, but we must not touch binary/non-text data
    if(isText == TRUE && srcCodeset != NULL && stricmp(srcCodeset->name, "utf-8") != 0)
    {
      ULONG strLen = 0;

      UTF8 *str = CodesetsUTF8Create(CSA_Source,          outbuffer,
                                     CSA_SourceLen,       outLength,
                                     CSA_SourceCodeset,   srcCodeset,
                                     CSA_DestLenPtr,      &strLen,
                                     TAG_DONE);

      if(str != NULL && strLen > 0)
      {
        // if we end up here we successfully converted the
        // sourcebuffer to a destination buffer which complies to our local
        // charset
        dptr = (char *)str;
        outLength = strLen;
      }
      else
      {
        W(DBF_MIME, "error while trying to convert base64decoded string to UTF8");
        dptr = outbuffer;
      }
    }
    else
      dptr = outbuffer;

    if(dptr != NULL)
    {
      // if the user also wants to convert CRLF to LF only,
      // we do it right now
      if(convCRLF == TRUE)
      {
        long r;
        char *rc = dptr;
        char *wc = dptr;

        for(r=0; r < outLength; r++, rc++)
        {
          // check if this is a CRLF
          if(*rc == '\r' &&
             outLength-r > 1 && rc[1] == '\n')
          {
            // if so, skip the \r
            continue;
          }
          else
          {
            // if no CRLF is found, lets copy
            // the plain character
            *wc = *rc;

            // increase the write counter
            wc++;
          }
        }

        // make sure we reduce outLength by the
        // number of "overjumped" chars.
        outLength -= (rc-wc);
      }

      // now that we got the string decoded we write it into
      // our file
      if(fwrite(dptr, sizeof(char), (size_t)outLength, out) != (size_t)outLength)
      {
        E(DBF_MIME, "error on writing data!");

        // an error occurred while writing...
        RETURN(-1);
        return -1;
      }
    }

    // in case the dptr buffer was allocated by codesets.library,
    // we have to free it now
    if(dptr != outbuffer && dptr != NULL)
      CodesetsFreeA(dptr, NULL);

    free(outbuffer);

    // increase the decodedChars counter
    decodedChars += outLength;
  }

  // if there was a problem during
  // the decoding phase we go and warn the user with a
  // return value of -2
  if(problemDuringDecode == TRUE)
    decodedChars = -2;

  RETURN(decodedChars);
  return decodedChars;
}

///