src/mime/qprintable.c from jens-maus/yam

src/mime/qprintable.c
Summary

Maintainability

Test Coverage

Issues
/***************************************************************************

 YAM - Yet Another Mailer
 Copyright (C) 1995-2000 Marcel Beck
 Copyright (C) 2000-2022 YAM Open Source Team

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

 YAM Official Support Site :  http://www.yam.ch
 YAM OpenSource project    :  http://sourceforge.net/projects/yamos/

 $Id$

***************************************************************************/

#include <ctype.h>
#include <stdio.h>
#include <string.h>

#include <proto/exec.h>
#include <proto/codesets.h>

#include "YAM.h"

#include "mime/qprintable.h"

#include "Config.h"

#include "Debug.h"

// Global variables

// some defines that can be usefull
#define QP_LINELEN  76    // number of chars before qpencode_file() issues a CRLF
#define QPENC_BUF   4096  // bytes to use as a quoted-printable file encoding buffer
#define QPDEC_BUF   4096  // bytes to use as a quoted-printable file decoding buffer

/*** Quoted-Printable encode/decode routines (RFC 2045) ***/
/// qpencode_file()
// Encodes a whole file using the quoted-printable format defined in
// RFC 2045 (page 19)
long qpencode_file(FILE *in, FILE *out)
{
  unsigned char inbuffer[QPENC_BUF+1]; // we read out data in ~4096 byte chunks
  unsigned char outbuffer[QPENC_BUF+5];// the output buffer should not be more than
                                       // the input buffer with an additional space
                                       // for 5 chars which could be used during
                                       // encoding.

  unsigned char *iptr;
  unsigned char *optr = outbuffer;
  unsigned char c;
  int last = -1;
  long encoded_chars = 0;
  int line_len = 0;
  BOOL eof_reached = FALSE;
  size_t read;

  ENTER();

  while(eof_reached == FALSE)
  {
    // read in 4096 byte chunks
    read = fread(inbuffer, 1, QPENC_BUF, in);

    // on a short item count we check for a potential
    // error and return immediatly.
    if(read != QPENC_BUF)
    {
      if(feof(in) != 0)
      {
        D(DBF_MIME, "EOF file at %ld", ftell(in));

        eof_reached = TRUE; // we found an EOF

        // if the last read was zero we can exit immediatly
        if(read == 0)
          break;
      }
      else
      {
        E(DBF_MIME, "error on reading data!");

        // an error occurred, lets return -1
        RETURN(-1);
        return -1;
      }
    }

    // let us now parse through the inbuffer and encode it according
    // to RFC 2045
    iptr = inbuffer;

    while(read)
    {
      // decrease the read number and increase
      // out input buffer pointer
      c = *iptr++;
      read--;

      if(c == '\n')
      {
        // check if the previous char is a linear whitespace and
        // if so we have to put a soft break right before the
        // newline
        if(last != -1 && (last == ' ' || last == '\t'))
        {
          *optr++ = '=';
          *optr++ = '\n';
        }

        *optr++ = '\n';

        // reset the line_len counter
        line_len = 0;
        last = -1;
      }
        // we encode the current char if:
        // 1) it is an unsafe safe
        // 2) it is an upcoming "From " at the start of a line
      else if(!is_qpsafe(c) ||
              (last == -1 && c == 'F' && strncmp((char *)iptr, "rom ", 4) == 0))
      {

        // before we can encode the data we have to check
        // whether there is enough space left on the line
        // or if we have to put it on the next line
        if(line_len+3 >= QP_LINELEN-1) // one space for the trailing '='
        {
          *optr++ = '=';
          *optr++ = '\n';

          // reset the line_len counter
          line_len = 0;
        }

        // now put out the encoded char
        *optr++ = '=';
        *optr++ = basis_hex[(c >> 4) & 0xF];
        *optr++ = basis_hex[c & 0xF];

        // increase the line_len counter
        line_len += 3;

        // count the number of encoded chars
        encoded_chars++;
      }
      else
      {
        // so this char seems to be safe to be directly placed
        // in the output buffer without any encoding. We just
        // have to check whether this line is going to be longer
        // than the limit
        if(line_len+1 >= QP_LINELEN-1) // one space for the trailing '='
        {
          *optr++ = '=';
          *optr++ = '\n';

          // reset the line_len counter
          line_len = 0;
        }

        *optr++ = c;

        // increase the line_len counter
        line_len++;
      }

      // let us now check if our outbuffer is filled up so that we can write
      // out the data to our out stream.
      if(optr-outbuffer >= QPENC_BUF)
      {
        size_t todo = optr-outbuffer;

        // now we do a binary write of the data
        if(fwrite(outbuffer, 1, todo, out) != todo)
        {
          E(DBF_MIME, "error on writing data!");

          // an error must have occurred.
          RETURN(-1);
          return -1;
        }

        // now reset the outbuffer and stuff
        optr = outbuffer;
      }

      last = c;
    }
  }

  // check if there is something in the outbuffer that
  // hasn't been written out yet
  if(optr-outbuffer > 0)
  {
    size_t todo = optr-outbuffer;

    // now we do a binary write of the data
    if(fwrite(outbuffer, 1, todo, out) != todo)
    {
      E(DBF_MIME, "error on writing data!");

      // an error must have occurred.
      RETURN(-1);
      return -1;
    }
  }

  RETURN(encoded_chars);
  return encoded_chars;
}

///
/// qpdecode_file()
// Decodes a whole file using the quoted-printable format defined in
// RFC 2045 (page 19)
long qpdecode_file(FILE *in, FILE *out, struct codeset *srcCodeset, BOOL isText)
{
  unsigned char inbuffer[QPDEC_BUF+1]; // lets use a 4096 byte large input buffer
  unsigned char outbuffer[QPDEC_BUF+1];// to speed things up we use the same amount
                                       // of memory for an output buffer as the
                                       // decoded string can't be larger than
                                       // the encoded one.

  unsigned char *iptr;
  unsigned char *optr = outbuffer;
  unsigned char c;
  size_t read = 0;
  size_t next_unget = 0;
  long decoded = 0;
  int result = 0;
  BOOL eof_reached = FALSE;

  ENTER();

  D(DBF_MIME, "codeset '%s'", srcCodeset != NULL ? srcCodeset->name : "none");

  while(eof_reached == FALSE)
  {
    // do a binary read of ~4096 chunks
    read = fread(&inbuffer[next_unget], sizeof(char), QPDEC_BUF-next_unget, in);

    // on a short item count we check for a potential
    // error and return immediatly.
    if(read != QPDEC_BUF-next_unget)
    {
      if(feof(in) != 0)
      {
        D(DBF_MIME, "EOF file at %ld", ftell(in));

        eof_reached = TRUE; // we found an EOF

        // if the last read was zero we can exit immediatly
        if(read == 0 && next_unget == 0)
          break;
      }
      else
      {
        E(DBF_MIME, "error on reading data!");

        // an error occurred, lets return -1
        RETURN(-1);
        return -1;
      }
    }

    // increase/reset the counters
    read += next_unget;
    next_unget = 0;

    // now that we have read in our buffer we have to parse through
    // it and decode eventually existing quoted printable encoded
    // chunks. The routines also analyze the data and returns an
    // error if non quoted-printable safe data is found, however
    // it still tries to decode the data until the end. This fail-safe
    // behaviour is suggested in RFC 2045 on page 22.
    iptr = inbuffer;

    while(read != 0)
    {
      c = *iptr++;
      read--;

      if(c == '=')
      {
        // check if the next char is a newline so that
        // we can skip the current =
        if(read != 0 && *iptr == '\n')
        {
          // skip the newline..
          iptr++;
          read--;

          continue;
        }

        // a '=' is the sign that a encoded string is following, so
        // let us check if we have enough space in our input buffer
        // and then decode it accordingly
        if(read >= 2)
        {
          unsigned char c1 = hexchar(*iptr);
          unsigned char c2 = hexchar(*(iptr+1));

          // so we have enough space, lets decode it, but let us
          // check if the two chars are really hexadecimal chars
          if(c1 != 255 && c2 != 255)
          {
            *optr++ = c1<<4 | c2;

            // increase the counters
            iptr += 2;
            read -= 2;

            // count the decoded chars
            decoded++;
          }
          else
          {
            // as suggested by RFC 2045 we keep the =XX sequence
            // and report a warning later to the user
            *optr++ = c;
            *optr++ = *iptr++;
            *optr++ = *iptr++;
            read -= 2;
            result = -3; // indicate a "decoding warning"
          }
        }
        else
        {
          // ok, there isn't enough space in the input buffer
          // so we break out here and parse the stuff on
          // the next iteration
          next_unget = read+1;
          memcpy(inbuffer, iptr-1, next_unget);

          break;
        }
      }
      else if(!isascii(c) ||
              (is_ctrl(c) && c != '\t' && c != '\n' &&
               c == '\r' && *iptr != '\n'))
      {
        // we found some not allowed char, so lets ignore it
        // but warn the user
        W(DBF_MIME, "nonallowed character '%lc' (%02lx) found", c, c);
        result = -4; // indicate a "unallowed control chars" warning
      }
      else
      {
        // the current char seems to be a normal
        // char, so lets output it
        *optr++ = c;
      }

      // let us now check if our outbuffer is filled up so that we can write
      // out the data to our out stream.
      if(optr-outbuffer >= QPDEC_BUF)
      {
        unsigned char *dptr = outbuffer;
        size_t todo = optr-outbuffer;

        // in case the user wants us to detect the correct cyrillic codeset
        // we do it now
        if(C->DetectCyrillic == TRUE && isText == TRUE)
        {
          if(srcCodeset == NULL || (srcCodeset->name != NULL && stricmp(srcCodeset->name, "utf-8") != 0))
          {
            struct codeset *cs = CodesetsFindBest(CSA_Source,         dptr,
                                                  CSA_SourceLen,      todo,
                                                  CSA_CodesetFamily,  CSV_CodesetFamily_Cyrillic,
                                                  TAG_DONE);

            if(cs != NULL && cs != srcCodeset)
            {
              D(DBF_MIME, "using codeset '%s' instead of '%s'", srcCodeset != NULL ? srcCodeset->name : "none", cs->name);
              srcCodeset = cs;
            }
          }
        }

        // if the caller supplied a source codeset, we have to
        // make sure we convert our outbuffer before writing it out
        // to the file in UTF8, but we must not touch binary/non-text data
        if(isText == TRUE && srcCodeset != NULL && stricmp(srcCodeset->name, "utf-8") != 0)
        {
          ULONG strLen = 0;

          UTF8 *str = CodesetsUTF8Create(CSA_Source,          dptr,
                                         CSA_SourceLen,       todo,
                                         CSA_SourceCodeset,   srcCodeset,
                                         CSA_DestLenPtr,      &strLen,
                                         TAG_DONE);

          if(str != NULL && strLen > 0)
          {
            // if we end up here we successfully converted the
            // sourcebuffer to a destination buffer which complies to our local
            // charset
            dptr = (unsigned char *)str;
            todo = strLen;
          }
          else
            W(DBF_MIME, "error while trying to convert qpdecoded string to UTF8");
        }

        // now we do a binary write of the data
        if(fwrite(dptr, 1, todo, out) != todo)
        {
          E(DBF_MIME, "error on writing data!");

          // in case the dptr buffer was allocated by codesets.library,
          // we have to free it now
          if(dptr != outbuffer)
            CodesetsFreeA(dptr, NULL);

          // an error must have occurred.
          RETURN(-1);
          return -1;
        }

        // in case the dptr buffer was allocated by codesets.library,
        // we have to free it now
        if(dptr != outbuffer)
          CodesetsFreeA(dptr, NULL);

        // now reset the outbuffer and stuff
        optr = outbuffer;
      }
    }
  }

  // check if there is something in the outbuffer that
  // hasn't been written out yet
  if(optr-outbuffer > 0)
  {
    unsigned char *dptr = outbuffer;
    size_t todo = optr-outbuffer;

    // in case the user wants us to detect the correct cyrillic codeset
    // we do it now
    if(C->DetectCyrillic == TRUE && isText == TRUE)
    {
      if(srcCodeset == NULL || (srcCodeset->name != NULL && stricmp(srcCodeset->name, "utf-8") != 0))
      {
        struct codeset *cs = CodesetsFindBest(CSA_Source,         dptr,
                                              CSA_SourceLen,      todo,
                                              CSA_CodesetFamily,  CSV_CodesetFamily_Cyrillic,
                                              TAG_DONE);

        if(cs != NULL && cs != srcCodeset)
        {
          D(DBF_MIME, "using codeset '%s' instead of '%s'", srcCodeset != NULL ? srcCodeset->name : "none", cs->name);
          srcCodeset = cs;
        }
      }
    }

    // if the caller supplied a source codeset, we have to
    // make sure we convert our outbuffer before writing it out
    // to the file in UTF8, but we must not touch binary/non-text data
    if(isText == TRUE && srcCodeset != NULL && stricmp(srcCodeset->name, "utf-8") != 0)
    {
      ULONG strLen = 0;

      UTF8 *str = CodesetsUTF8Create(CSA_Source,          dptr,
                                     CSA_SourceLen,       todo,
                                     CSA_SourceCodeset,   srcCodeset,
                                     CSA_DestLenPtr,      &strLen,
                                     TAG_DONE);

      if(str != NULL && strLen > 0)
      {
        // if we end up here we successfully converted the
        // sourcebuffer to a destination buffer which complies to our local
        // charset
        dptr = (unsigned char *)str;
        todo = strLen;
      }
      else
        W(DBF_MIME, "error while trying to convert qpdecoded string to UTF8");
    }

    // now we do a binary write of the data
    if(fwrite(dptr, 1, todo, out) != todo)
    {
      E(DBF_MIME, "error on writing data!");

      // in case the dptr buffer was allocated by codesets.library,
      // we have to free it now
      if(dptr != outbuffer)
        CodesetsFreeA(dptr, NULL);

      // an error must have occurred.
      RETURN(-1);
      return -1;
    }

    // in case the dptr buffer was allocated by codesets.library,
    // we have to free it now
    if(dptr != outbuffer)
      CodesetsFreeA(dptr, NULL);
  }

  // if we end up here and read > 0 then the decoding wasn't finished
  // and we have to return an error
  if(read > 0)
  {
    RETURN(-2);
    return -2; // -2 means "unfinished decoding"
  }

  // on success lets return the number of decoded
  // chars
  RETURN(result == 0 ? decoded : result);
  return result == 0 ? decoded : result;
}

///