src/mime/uucode.c from jens-maus/yam

src/mime/uucode.c
Summary

Maintainability

Test Coverage

Issues
/***************************************************************************

 YAM - Yet Another Mailer
 Copyright (C) 1995-2000 Marcel Beck
 Copyright (C) 2000-2022 YAM Open Source Team

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

 YAM Official Support Site :  http://www.yam.ch
 YAM OpenSource project    :  http://sourceforge.net/projects/yamos/

 $Id$

***************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <proto/exec.h>
#include <proto/codesets.h>

#include "YAM.h"

#include "mime/qprintable.h"

#include "Config.h"

#include "Debug.h"

// some defines that can be usefull
#define UUENC_OWIDTH (60+3)                 // (60+3) length of the output string: +3 for len+checksum+newline char
#define UUENC_IWIDTH ((UUENC_OWIDTH-3)/4*3) // (45)   input length of data that should be uuencoded
#define UUENC_OBUF   (UUENC_OWIDTH*100)     // (6200) bytes to use as a uucode output buffer
#define UUENC_IBUF   (UUENC_IWIDTH*100)     // (4500) bytes to use as a uucode input buffer
#define UUDEC_IBUF   UUENC_OBUF             // bytes to use as a uucode input decoding buffer
#define UUDEC_OBUF   UUENC_IBUF             // bytes to use as a uucode output decoding buffer
#define UUDEC_BUF    4096                   // bytes to use as a uucode file decoding buffer
#define UUMAX_CHAR   64                     // the maximum value of a char to hit c + 32 = 96

#define UUENCODE_CHAR(c)  (((c) & 0x3F) ? ((c) & 0x3F) + ' ' : '`')
#define UUDECODE_CHAR(c)  (((c) - ' ') & 0x3F)

/*** UU encode/decode routines ***/
/// uuencode_file()
// Encodes a whole file using the good-old UUEncode algorithm which isn't
// suprisingly defined in any RFC out there. So here we use a slightly, but
// fully compatible approach of what is defined in the original BSD UUcode
// definition. In addition to the "normal" UU encoding we add a checksum
// char to every encoded line so that during decoding a mailer could check
// the integrity of the UU encoded string and warn the user accordingly.
long uuencode_file(FILE *in, FILE *out)
{
  unsigned char inbuffer[UUENC_IBUF+1]; // we read out data in ~4500 byte chunks
  unsigned char outbuffer[UUENC_OBUF+1];// the output buffer
  unsigned char *iptr;
  unsigned char *optr = outbuffer;
  long encoded_chars = 0;
  BOOL eof_reached = FALSE;
  size_t read;

  ENTER();

  while(eof_reached == FALSE)
  {
    // read in 4096 byte chunks
    read = fread(inbuffer, 1, UUENC_IBUF, in);

    // on a short item count we check for a potential
    // error and return immediatly.
    if(read != UUENC_IBUF)
    {
      if(feof(in) != 0)
      {
        D(DBF_MIME, "EOF file at %ld", ftell(in));

        eof_reached = TRUE; // we found an EOF

        // if the last read was zero we can exit immediatly
        if(read == 0)
          break;
      }
      else
      {
        E(DBF_MIME, "error on reading data!");

        // an error occurred, lets return -1
        RETURN(-1);
        return -1;
      }
    }

    // let us now parse through the inbuffer and encode it according
    // to UUEncoding rules
    iptr = inbuffer;

    while(read > 0)
    {
      int i;
      int checksum = 0;

      // check if we can parse a whole 45 chars long input line
      // or if we have to enough less than that.
      if(read/UUENC_IWIDTH > 0)
        *optr++ = UUENCODE_CHAR(UUENC_IWIDTH);
      else
        *optr++ = UUENCODE_CHAR(read);

      // then we encode by reading out 3 bytes each until we haven't
      // enough bytes left and have to do it different or if we
      // hit the line limit if 45 input chars.
      for(i=0; read/3 > 0 && i < UUENC_IWIDTH; i+=3)
      {
        unsigned char c1 = *iptr++;
        unsigned char c2 = *iptr++;
        unsigned char c3 = *iptr++;

        // encode the three chars (c1,c2,c3) into four output
        // chars by forming groups of 6 bits like the UUcode
        // definition defines it.
        *optr++ = UUENCODE_CHAR(c1 >> 2);
        *optr++ = UUENCODE_CHAR((c1 << 4) | ((c2 >> 4) & 0xF));
        *optr++ = UUENCODE_CHAR((c2 << 2) | ((c3 >> 6) & 0x3));
        *optr++ = UUENCODE_CHAR(c3);

        // calculate an incremental checksum
        checksum += (c1+c2+c3) % UUMAX_CHAR;

        encoded_chars += 4;
        read -= 3;
      }

      // check why we quit the above loop. If we quit it because
      // we hit the maximum input line length or if there was
      // nothing left to read we just write out the checksum
      // and a newline to finish the current line
      if(i >= UUENC_IWIDTH || read == 0)
      {
        *optr++ = UUENCODE_CHAR(checksum);
        *optr++ = '\n';
      }
      else if(read > 0)
      {
        unsigned char c1 = *iptr++;
        unsigned char c2 = 0;

        // if we hit this branch, then we have still some data to
        // process, but there are < 3 chars left, so we make it
        // easy and start encoding iterative.
        *optr++ = UUENCODE_CHAR(c1 >> 2);
        read--;

        // check if we still one byte left so that we can
        // include it into the decoding.
        if(read > 0)
        {
          c2 = *iptr++;
          read--;
        }

        // ok, and as it can't be more than 2 bytes we got, we
        // can simply encode the virtual 3 bytes into the rest
        // of the 3 bytes here.
        *optr++ = UUENCODE_CHAR((c1 << 4) | ((c2 >> 4) & 0xF));
        *optr++ = UUENCODE_CHAR(c2 << 2);
        *optr++ = '`';

        encoded_chars += 4;

        // put out a checksum char aswell and a finalizing newline
        checksum += (c1+c2) % UUMAX_CHAR;
        *optr++ = UUENCODE_CHAR(checksum);
        *optr++ = '\n';
      }

      // let us now check if our outbuffer is filled up so that we can write
      // out the data to our out stream.
      if(optr-outbuffer >= UUENC_OBUF ||
         eof_reached == TRUE)
      {
        size_t todo = optr-outbuffer;

        // make sure that we haven't overwritten the outbuffer
        // or otherwise some memory will be corrupted for sure.
        ASSERT(todo <= UUENC_OBUF);

        // now we do a binary write of the data
        if(fwrite(outbuffer, 1, todo, out) != todo)
        {
          E(DBF_MIME, "error on writing data!");

          // an error must have occurred.
          RETURN(-1);
          return -1;
        }

        // now reset the outbuffer and stuff
        optr = outbuffer;
      }
    }
  }

  RETURN(encoded_chars);
  return encoded_chars;
}

///
/// uudecode_file()
// Decode a UUencoded file using separate input/output buffers to speed up
// processing. It also takes respect of eventually existing checksums and
// tries to validate the UUencoded file to conform to the BSD standard or
// otherwise return an error/warning by returning negative values.
long uudecode_file(FILE *in, FILE *out, struct codeset *srcCodeset, BOOL isText)
{
  unsigned char inbuffer[UUDEC_IBUF+1]; // we read out data in ~4500 byte chunks
  unsigned char outbuffer[UUDEC_OBUF+1];// the output buffer
  unsigned char *iptr;
  unsigned char *optr = outbuffer;
  size_t read;
  size_t next_unget = 0;
  long decoded = 0;
  int line_len = 0;
  int result = 0;
  int checksum = 0;
  BOOL eof_reached = FALSE;

  ENTER();

  D(DBF_MIME, "codeset '%s'", srcCodeset != NULL ? srcCodeset->name : "none");

  // before we start with our decoding we have to search for
  // the starting "begin XXX" line
  do
  {
    if(fgets((char *)inbuffer, UUDEC_IBUF, in) != 0)
    {
      // check if this line start with "begin " and if so
      // break out and continue decoding the real data
      if(strncmp((char *)inbuffer, "begin ", 6) == 0)
        break;
    }
    else if(feof(in) != 0)
    {
      RETURN(-2);
      return -2; // -2 means "no UUcode start found"
    }
    else
    {
      RETURN(-1);
      return -1; // -1 means unexpected error
    }
  }
  while(TRUE);

  // start decoding the "real" data.
  while(eof_reached == FALSE)
  {
    // do a binary read of a multiple of UUDEC_IBUF
    read = fread(&inbuffer[next_unget], sizeof(char), UUDEC_IBUF-next_unget, in);

    // on a short item count we check for a potential
    // error and return immediatly.
    if(read != UUDEC_IBUF-next_unget)
    {
      if(feof(in) != 0)
      {
        D(DBF_MIME, "EOF file at %ld", ftell(in));

        eof_reached = TRUE; // we found an EOF

        // if the last read was zero we can exit immediatly
        if(read == 0 && next_unget == 0)
          break;
      }
      else
      {
        E(DBF_MIME, "error on reading data!");

        // an error occurred, lets return -1
        RETURN(-1);
        return -1;
      }
    }

    // increase/reset the counters
    read += next_unget;
    next_unget = 0;

    // now that we have read in our buffer we have to parse through
    // it and decode all chars according to the uudecoding rules
    // of the UUcode encoding.
    iptr = inbuffer;

    while(read)
    {
      if(line_len == 0)
      {
        unsigned char c = *iptr++;
        read--;

        // skip whitespaces on a fresh line
        while(is_lwsp(c) && read)
        {
          c = *iptr++;
          read--;
        }

        // if the line length counter is zero we haven't read the
        // first byte to check how long the line is going to be
        line_len = UUDECODE_CHAR(c);
        if(line_len == 0)
        {
          unsigned char *cptr;

          // lets check whether this is just the sign that we
          // are at the end of our data or if it is an error
          if(*iptr == '\n' ||
            (*iptr == '`' && *(iptr+1) == '\n'))
          {
            // ok, we seem to have found the ending ' on a UUcode
            // line, so lets check if we have the finalizing "end"
            if(*iptr == '`')
            {
              iptr += 2;
              read -= 2;
            }
            else
            {
              iptr++;
              read--;
            }

            // check if there is enough space
            if(read < 3 && eof_reached == FALSE)
            {
              // copy back the rest of the stuff
              memcpy(inbuffer, iptr, read);

              // do a small binary read
              read += fread(&inbuffer[read], sizeof(char), 3-read, in);

              iptr = inbuffer;
            }

            // set the checkpointer
            cptr = iptr;

            // check again
            if(read < 3 || strncmp((char *)cptr, "end", 3) != 0)
            {
              // if we end up here then there isn't enough
              // data left for checking the finalizing "end"
              // or we just didn't find it, but somehow we were
              // able to decode all our data, so lets just drop
              // the user a warning
              result = -6; // -6 means "no end tag"
            }

            // set eof to let the outer loop terminate
            eof_reached = TRUE;
            break;
          }
          else
          {
            E(DBF_MIME, "error: invalid length ID");

            result = -3; // -3 means "invalid length ID"
          }
        }

        // clear our checksum
        checksum = 0;
      }
      else
      {
        while(line_len && read/4 > 0)
        {
          int tempsum;
          unsigned char c;
          unsigned char c1 = UUDECODE_CHAR(*iptr++);
          unsigned char c2 = UUDECODE_CHAR(*iptr++);
          unsigned char c3 = UUDECODE_CHAR(*iptr++);
          unsigned char c4 = UUDECODE_CHAR(*iptr++);

          // now that we have our input chars we can
          // decode them to our output buffer directly
          tempsum = c = c1 << 2 | c2 >> 4;
          *optr++ = c;
          line_len--;
          decoded++;

          if(line_len)
          {
            tempsum += c = c2 << 4 | c3 >> 2;
            *optr++ = c;
            line_len--;
            decoded++;

            if(line_len)
            {
              tempsum += c = c3 << 6 | c4;
              *optr++ = c;
              line_len--;
              decoded++;
            }
          }

          // calculate the checksum aswell
          checksum += tempsum % UUMAX_CHAR;

          // decrease the read counter
          read -= 4;
        }

        // let us now check if our outbuffer is filled up so that we can write
        // out the data to our out stream.
        if(optr-outbuffer >= UUDEC_OBUF)
        {
          unsigned char *dptr = outbuffer;
          size_t todo = optr-outbuffer;

          // in case the user wants us to detect the correct cyrillic codeset
          // we do it now
          if(C->DetectCyrillic == TRUE && isText == TRUE)
          {
            if(srcCodeset == NULL || (srcCodeset->name != NULL && stricmp(srcCodeset->name, "utf-8") != 0))
            {
              struct codeset *cs = CodesetsFindBest(CSA_Source,         dptr,
                                                    CSA_SourceLen,      todo,
                                                    CSA_CodesetFamily,  CSV_CodesetFamily_Cyrillic,
                                                    TAG_DONE);

              if(cs != NULL && cs != srcCodeset)
              {
                D(DBF_MIME, "using codeset '%s' instead of '%s'", srcCodeset != NULL ? srcCodeset->name : "none", cs->name);
                srcCodeset = cs;
              }
            }
          }

          // if the caller supplied a source codeset, we have to
          // make sure we convert our outbuffer before writing it out
          // to the file in UTF8
          if(isText == TRUE && srcCodeset != NULL && stricmp(srcCodeset->name, "utf-8") != 0)
          {
            ULONG strLen = 0;

            UTF8 *str = CodesetsUTF8Create(CSA_Source,          dptr,
                                           CSA_SourceLen,       todo,
                                           CSA_SourceCodeset,   srcCodeset,
                                           CSA_DestLenPtr,      &strLen,
                                           TAG_DONE);

            if(str != NULL && strLen > 0)
            {
              // if we end up here we successfully converted the
              // sourcebuffer to a destination buffer which complies to our local
              // charset
              dptr = (unsigned char *)str;
              todo = strLen;
            }
            else
              W(DBF_MIME, "error while trying to convert uudecoded string to UTF8");
          }

          // now we do a binary write of the data
          if(fwrite(dptr, 1, todo, out) != todo)
          {
            E(DBF_MIME, "error on writing data!");

            // in case the dptr buffer was allocated by codesets.library,
            // we have to free it now
            if(dptr != outbuffer)
              CodesetsFreeA(dptr, NULL);

            // an error must have occurred.
            RETURN(-1);
            return -1;
          }

          // in case the dptr buffer was allocated by codesets.library,
          // we have to free it now
          if(dptr != outbuffer)
            CodesetsFreeA(dptr, NULL);

          // now reset the outbuffer and stuff
          optr = outbuffer;
        }

        // if line_len == 0 then we probably read through
        // our expected end of the line, so lets check if
        // the next char is either a newline or another
        // uuencoded char, which could be the checksum
        // of our input char
        if(line_len == 0)
        {
          unsigned char last = *iptr;

          if(last == '\n')
          {
            // there seems to be no checksum on this line
            // so lets go on without checking it.
            iptr++;
            read--;
          }
          else if(last > ' ' && last <= '`' &&
                  *(iptr+1) == '\n')
          {
            // check if our calculated checksum is
            // identical to the last char found
            if(UUENCODE_CHAR(checksum) != last)
            {
              E(DBF_MIME, "wrong checksum: %ld:%ld != %ld", checksum, UUENCODE_CHAR(checksum), last);

              // the checksum seems to be wrong
              // so lets signal it on exiting this
              // function
              result = -4; // -4 means "wrong checksum"
            }

            iptr += 2;
            read -= 2;
          }
          else
          {
            E(DBF_MIME, "error: no newline or no checksum found at end");

            // something serious must have happend
            // as either the last char isn't a newline
            // or a checksum is wrong, so lets exit with
            // an error immediatly
            RETURN(-5);
            return -5; // -5 means corrupted UUcode string found
          }
        }
        else if(read > 0)
        {
          // ok, there isn't enough studd in the input buffer
          // so we break out here and parse the stuff on
          // the next iteration
          next_unget = read;
          memcpy(inbuffer, iptr, next_unget);

          break;
        }
      }
    }
  }

  // check if there is something in the outbuffer that
  // hasn't been written out yet
  if(optr-outbuffer > 0)
  {
    unsigned char *dptr = outbuffer;
    size_t todo = optr-outbuffer;

    // in case the user wants us to detect the correct cyrillic codeset
    // we do it now
    if(C->DetectCyrillic == TRUE && isText == TRUE)
    {
      if(srcCodeset == NULL || (srcCodeset->name != NULL && stricmp(srcCodeset->name, "utf-8") != 0))
      {
        struct codeset *cs = CodesetsFindBest(CSA_Source,         dptr,
                                              CSA_SourceLen,      todo,
                                              CSA_CodesetFamily,  CSV_CodesetFamily_Cyrillic,
                                              TAG_DONE);

        if(cs != NULL && cs != srcCodeset)
        {
          D(DBF_MIME, "using codeset '%s' instead of '%s'", srcCodeset != NULL ? srcCodeset->name : "none", cs->name);
          srcCodeset = cs;
        }
      }
    }

    // if the caller supplied a source codeset, we have to
    // make sure we convert our outbuffer before writing it out
    // to the file in UTF8, but we must not touch binary/non-text data
    if(isText == TRUE && srcCodeset != NULL && stricmp(srcCodeset->name, "utf-8") != 0)
    {
      ULONG strLen = 0;

      UTF8 *str = CodesetsUTF8Create(CSA_Source,          dptr,
                                     CSA_SourceLen,       todo,
                                     CSA_SourceCodeset,   srcCodeset,
                                     CSA_DestLenPtr,      &strLen,
                                     TAG_DONE);

      if(str != NULL && strLen > 0)
      {
        // if we end up here we successfully converted the
        // sourcebuffer to a destination buffer which complies to our local
        // charset
        dptr = (unsigned char *)str;
        todo = strLen;
      }
      else
        W(DBF_MIME, "error while trying to convert uudecoded string to local charset!");
    }

    // now we do a binary write of the data
    if(fwrite(dptr, 1, todo, out) != todo)
    {
      E(DBF_MIME, "error on writing data!");

      // in case the dptr buffer was allocated by codesets.library,
      // we have to free it now
      if(dptr != outbuffer)
        CodesetsFreeA(dptr, NULL);

      // an error must have occurred.
      RETURN(-1);
      return -1;
    }

    // in case the dptr buffer was allocated by codesets.library,
    // we have to free it now
    if(dptr != outbuffer)
      CodesetsFreeA(dptr, NULL);
  }

  // on success lets return the number of decoded
  // chars
  RETURN(result == 0 ? decoded : result);
  return result == 0 ? decoded : result;
}

///