decode/MailParser.php
<?php
namespace Mail;
use \Mail_mimeDecode;
class MailParser
{
protected $allowedMimeTypes = [
'audio/wave',
'audio/caf',
'application/pdf',
'application/zip',
'application/octet-stream',
'image/jpeg',
'image/png',
'image/gif'
];
protected $disallowedMimeTypes = [];
protected $charset = 'UTF-8';
protected $debug = false;
/**
* raw mail split up by header field
*
* @var array
*/
protected $rawFields;
/**
* Body as array of strings, each element is a line
*
* @var array
*/
protected $rawBodyLines;
protected $raw = null;
protected $decoded = null;
protected $body = "";
protected $html = "";
/**
* Attachments Array
*
* @var array
*/
protected $attachments = [];
/**
* A safe place for files.
*
* @var string
*/
protected $attachment_directory;
public function __construct($raw = null)
{
if (!empty($raw)) {
$this->parse($raw);
}
}
public function parse($raw)
{
if ($raw !== null) {
$this->raw = $raw;
}
if ($this->raw !== null) {
$this->extractHeadersAndBody();
return $this;
}
}
public function decode($attachment_directory = null, $saving = true)
{
if ($this->raw !== null) {
if (!empty($attachment_directory))
$this->attachment_directory = $attachment_directory;
elseif (empty($this->attachment_directory))
$this->attachment_directory = \sys_get_temp_dir();
if (!\file_exists($this->attachment_directory))
@\mkdir($this->attachment_directory, 0770, true);
// add trailing slash if needed
if (!\preg_match('|\\/$|', $this->attachment_directory)) {
$this->attachment_directory .= \DIRECTORY_SEPARATOR;
}
// http://pear.php.net/manual/en/package.mail.mail-mimedecode.decode.php
$decoder = new Mail_mimeDecode($this->raw);
$this->decoded = $decoder->decode([
'decode_headers' => true,
'include_bodies' => true,
'decode_bodies' => true
]);
if (isset($this->decoded->parts) && \is_array($this->decoded->parts)) {
foreach ($this->decoded->parts as $idx => $body_part) {
$this->decodePart($body_part, $saving);
}
}
if (isset($this->decoded->disposition) && $this->decoded->disposition == 'inline') {
$mime_type = "{$this->decoded->ctype_primary}/{$this->decoded->ctype_secondary}";
if (isset($this->decoded->d_parameters) && \array_key_exists('filename', $this->decoded->d_parameters)) {
$filename = $this->decoded->d_parameters['filename'];
} else {
$filename = 'file';
}
if ($this->isValidAttachment($mime_type)) {
$this->saveAttachment($filename, $this->decoded->body, $mime_type, $saving);
}
$this->body = "";
}
// We might also have uuencoded files. Check for those.
if (empty($this->body)) {
$this->body = isset($this->decoded->body) ? $this->decoded->body : "";
}
if (\preg_match("/begin ([0-7]{3}) (.+)\r?\n(.+)\r?\nend/Us", $this->body) > 0) {
foreach ($decoder->uudecode($this->body) as $file) {
$this->saveAttachment($file['filename'], $file['filedata'], 'unknown', $saving);
}
// Strip out all the uuencoded attachments from the body
while (\preg_match("/begin ([0-7]{3}) (.+)\r?\n(.+)\r?\nend/Us", $this->body) > 0) {
$this->body = \preg_replace("/begin ([0-7]{3}) (.+)\r?\n(.+)\r?\nend/Us", "\n", $this->body);
}
}
$this->body = \mb_convert_encoding($this->body, $this->charset, $this->charset);
return $this->attachments;
}
}
/**
* Detect and return an array of attachments and their data.
* ```
* Array([0] =>
* Array(
* [type] => audio/caf
* [name] => example_vmr_09102012182307.3gp
* [content] => Y2FmZgA
* ), Array(...)
* )
* ```
* JSON format.
* ```
* [{
* "type": "text/plain",
* "name": "text.txt",
* "content": "dGVTVGluZyA="
* }, {}]
* ```
* @param bool $json return data in JSON format
* @return array|bool
*/
public function getAttachments($json = false)
{
$decoded = $this->decode(null, false);
return ($json) ? \json_encode($decoded) : $decoded;
}
/**
*
* @param string $line
* @return boolean
*/
private function isNewLine($line)
{
$line = \str_replace("\r", '', $line);
$line = \str_replace("\n", '', $line);
return (\strlen($line) === 0);
}
private function extractHeadersAndBody()
{
$lines = \preg_split("/(\r?\n|\r)/", $this->raw);
$currentHeader = '';
$i = 0;
foreach ($lines as $line) {
if ($this->isNewLine($line)) {
// end of headers
$this->rawBodyLines = \array_slice($lines, $i);
break;
}
// check is line starting With printable character
if (\preg_match('/^[A-Za-z]/', $line)) {
// start of new header
\preg_match('/([^:]+): ?(.*)$/', $line, $matches);
$newHeader = \strtolower($matches[1]);
$value = $matches[2];
if (isset($this->rawFields[$newHeader])) {
if (\is_array($this->rawFields[$newHeader]))
$this->rawFields[$newHeader][] = $value;
else
$this->rawFields[$newHeader] = array($this->rawFields[$newHeader], $value);
} else
$this->rawFields[$newHeader] = $value;
$currentHeader = $newHeader;
} else {
// more lines related to the current header
if ($currentHeader) {
// to prevent notice from empty lines
if (\is_array($this->rawFields[$currentHeader])) {
$this->rawFields[$currentHeader][\count($this->rawFields[$currentHeader]) - 1] .= \substr($line, 1);
} else {
$this->rawFields[$currentHeader] .= \substr($line, 1);
}
}
}
$i++;
}
}
/**
* Add additional allowed mime types to the list.
*
* @param string
*/
public function addMimeType($mime_types = '')
{
if (\strpos($mime_types, '/') !== false)
\array_push($this->allowedMimeTypes, $mime_types);
return $this;
}
/**
* Add additional disallowed mime types to the list.
*
* @param string
*/
public function removeMimeType($mime_types = '')
{
if (\strpos($mime_types, '/') !== false)
\array_push($this->disallowedMimeTypes, $mime_types);
return $this;
}
/**
* @return string - UTF8 encoded
*
* Example of an email body
*
* --0016e65b5ec22721580487cb20fd
* Content-Type: text/plain; charset=ISO-8859-1
* Hi all. I am new to Android development.
* Please help me.
*
* --
* My signature
* email: myemail@gmail.com
* web: http://www.example.com
* --0016e65b5ec22721580487cb20fd
* Content-Type: text/html; charset=ISO-8859-1
*/
public function getBody(string $returnType = null)
{
$body = '';
$detectedContentType = false;
$contentTransferEncoding = null;
$charset = 'ASCII';
$waitingForContentStart = true;
if ($returnType == 'HTML')
$contentTypeRegex = '/^Content-Type: ?text\/html/i';
else
$contentTypeRegex = '/^Content-Type: ?text\/plain/i';
// there could be more than one boundary. This also skips the quotes if they are included.
\preg_match_all('/(*ANYCRLF)boundary=(?:|")([a-zA-Z0-9_=\.\(\)_\/+-]+)(?:|")(?:$|;)/mi', $this->raw, $matches);
$boundaries = $matches[1];
// sometimes boundaries are delimited by quotes - we want to remove them
foreach ($boundaries as $i => $v) {
$boundaries[$i] = \trim(\str_replace(array("'", '"'), '', $v));
}
foreach ($this->rawBodyLines as $line) {
if (!$detectedContentType) {
if (\preg_match($contentTypeRegex, $line, $matches)) {
$detectedContentType = true;
}
if (\preg_match('/charset=(.*)/i', $line, $matches)) {
$charset = \strtoupper(\trim($matches[1], '"'));
}
} elseif ($detectedContentType && $waitingForContentStart) {
if (\preg_match('/charset=(.*)/i', $line, $matches)) {
$charset = \strtoupper(\trim($matches[1], '"'));
}
if ($contentTransferEncoding == null && \preg_match('/^Content-Transfer-Encoding: ?(.*)/i', $line, $matches)) {
$contentTransferEncoding = $matches[1];
}
if ($this->isNewLine($line)) {
$waitingForContentStart = false;
}
} else { // ($detectedContentType && !$waitingForContentStart)
// collecting the actual content until we find the delimiter
// if the delimited is AAAAA, the line will be --AAAAA - that's why we use substr
if (\is_array($boundaries)) {
if (\in_array(\substr($line, 2), $boundaries) || \in_array(\substr($line, 2, -2), $boundaries)) {
break;
}
}
$body .= $line . "\n";
}
}
if (!$detectedContentType) {
// if here, we missed the text/plain content-type (probably it was
// in the header), thus we assume the whole body is what we are after
$body = \implode("\n", $this->rawBodyLines);
}
// removing trailing new lines
$body = \preg_replace('/((\r?\n)*)$/', '', $body);
if ($contentTransferEncoding == 'base64')
$body = \base64_decode($body);
else if ($contentTransferEncoding == 'quoted-printable')
$body = \quoted_printable_decode($body);
if ($charset != 'UTF-8') {
// FORMAT=FLOWED, despite being popular in emails, it is not
// supported by iconv
$charset = \str_replace("FORMAT=FLOWED", "", $charset);
$bodyCopy = $body;
$body = \iconv($charset, 'UTF-8//TRANSLIT', $body);
// iconv returns FALSE on failure
if ($body === false) {
$body = \utf8_encode($bodyCopy);
}
}
return $body;
}
/**
* @return string - UTF8 encoded
*/
public function getPlain()
{
return $this->getBody('PLAIN');
}
/**
* @return string - UTF8 encoded
*/
public function getHtml()
{
$body = $this->getBody('HTML');
$tmp = explode("</html>", $body);
return $tmp[0] . "</html>"; // omit any attachments.
}
/**
*
* @return string
* @throws \Exception if a to header is not found or if there are no recipient
*/
public function getTo()
{
if (!isset($this->rawFields['to'])) {
throw new \Exception("Couldn't find the recipients of the email");
}
// @see https://www.php.net/manual/en/function.imap-utf8.php#102081
return \iconv_mime_decode($this->rawFields['to'], 0, "UTF-8");
}
/**
*
* @return string (in UTF-8 format)
* @throws \Exception if a subject header is not found
*/
public function getSubject()
{
if (!isset($this->rawFields['subject'])) {
throw new \Exception("Couldn't find the subject of the email");
}
// @see https://www.php.net/manual/en/function.imap-utf8.php#102081
return \iconv_mime_decode($this->rawFields['subject'], 0, "UTF-8");
}
/**
* N.B.: if the header doesn't exist an empty string is returned
*
* @param string $headerName - the header we want to retrieve
* @return string - the value of the header
*/
public function getHeader(string $headerName)
{
$headerName = \strtolower($headerName);
if (isset($this->rawFields[$headerName])) {
return $this->rawFields[$headerName];
}
return '';
}
/**
* The parsed headers as associative array
*
* @return array
*/
public function getHeaders()
{
return $this->rawFields;
}
/**
*
* @return array
*/
public function getCc()
{
if (!isset($this->rawFields['cc'])) {
return;
}
return \explode(',', $this->rawFields['cc']);
}
/**
*
* @return array
*/
public function getBcc()
{
if (!isset($this->rawFields['bcc'])) {
return;
}
return \explode(',', $this->rawFields['bcc']);
}
/**
* Returns the receive date
*
* @return string $date
*/
public function getDate()
{
$date = $this->getHeader("date");
return $date;
}
/**
* Returns full sender name and email
*
* @return string (in UTF-8 format)
* @throws \Exception if a subject header is not found
*/
public function getFrom()
{
if (!isset($this->rawFields['from'])) {
throw new \Exception("Couldn't find the sender of the email");
}
// @see https://www.php.net/manual/en/function.imap-utf8.php#102081
return \iconv_mime_decode($this->rawFields['from'], 0, "UTF-8");
}
/**
* Returns the name of To or From
*
* @return string
* @throws \Exception if To or From is not found
*/
private function getToFromName($toFrom = null)
{
if (empty($toFrom)) {
throw new \Exception("Couldn't find To or From of the email");
}
// the returned string is like "John Smith <john.smith@example.com>"
$strName = $this->getHeader($toFrom);
//if returned string == "<john.smith@example.com>" there's not a name. So < and > chars are removed the result is checked
$trans = array("<" => "", ">" => "");
$possibleEmail = \strtr($strName, $trans);
// if it's a valid email, there's no name
if (\filter_var($possibleEmail, \FILTER_VALIDATE_EMAIL)) {
return "";
}
// else, it's only returned what's before the < char
return \substr($strName, 0, \strpos($strName, '<') - 1);
}
public function getFromName()
{
return $this->getToFromName("from");
}
public function getToName()
{
return $this->getToFromName("to");
}
/**
* Returns the email of To or From
*
* @return string
*/
private function getToFromEmail($toFrom = null)
{
if (empty($toFrom)) {
throw new \Exception("Couldn't find To or From of the email");
}
// the returned string is like "John Smith <john.smith@example.com>"
$strEmail = $this->getHeader($toFrom);
//if returned string == "<john.smith@example.com>" there's not a name. So < and > chars are removed the result is checked
$trans = array("<" => "", ">" => "");
$possibleEmail = \strtr($strEmail, $trans);
// if it's a valid email, it's returned immediately
if (\filter_var($possibleEmail, \FILTER_VALIDATE_EMAIL)) {
return $possibleEmail;
}
// else it's only returned what's between the < > chars
return \substr($strEmail, \strpos($strEmail, '<') + 1, \strlen($strEmail) - \strpos($strEmail, '<') - 2);
}
public function getFromEmail()
{
return $this->getToFromEmail('from');
}
public function getToEmail()
{
return $this->getToFromEmail('to');
}
/**
* Decode a single body part of an email message,
* the body part of the email message, as parsed by Mail_mimeDecode.
*
* Recursive if nested body parts are found
*
* This is the meat of the script.
*
* @param mixed $body_part (required)
*/
private function decodePart($body_part, $saving = true)
{
if (isset($body_part->ctype_parameters) && \is_array($body_part->ctype_parameters)) {
if (\array_key_exists('name', $body_part->ctype_parameters)) {
$filename = $body_part->ctype_parameters['name'];
} elseif (\array_key_exists('filename', $body_part->ctype_parameters)) {
$filename = $body_part->ctype_parameters['filename'];
}
} elseif (isset($body_part->d_parameters) && \is_array($body_part->d_parameters)) {
if (\array_key_exists('filename', $body_part->d_parameters)) {
$filename = $body_part->d_parameters['filename'];
}
}
if (!isset($filename)) {
$filename = "file";
}
$mime_type = "{$body_part->ctype_primary}/{$body_part->ctype_secondary}";
if ($this->debug) {
print "Found body part type $mime_type\n";
}
if ($body_part->ctype_primary == 'multipart') {
if (\is_array($body_part->parts)) {
foreach ($body_part->parts as $ix => $sub_part) {
$this->decodePart($sub_part, $saving);
}
}
} elseif (!isset($body_part->disposition) || $body_part->disposition == 'inline') {
switch ($mime_type) {
case 'text/plain':
$this->body .= \mb_convert_encoding($body_part->body, $this->charset, $this->charset) . "\n";
break;
case 'text/html':
$this->html .= \mb_convert_encoding($body_part->body, $this->charset, $this->charset) . "\n";
break;
default:
if ($this->isValidAttachment($mime_type)) {
$this->saveAttachment($filename, $body_part->body, $mime_type, $saving);
}
}
} else {
if ($this->isValidAttachment($mime_type)) {
$this->saveAttachment($filename, $body_part->body, $mime_type, $saving);
}
}
}
private function isValidAttachment($mime_type)
{
if (\in_array($mime_type, $this->allowedMimeTypes) && !\in_array($mime_type, $this->disallowedMimeTypes)) {
return true;
}
return false;
}
/**
* Save off a single file
*
* @param string $filename (required) The filename to use for this file
* @param mixed $contents (required) The contents of the file we will save
* @param string $mimeType (required) The mime-type of the file
* @param bool $saving should we actual save to file system
*/
private function saveAttachment($filename, $contents, $mime_type = 'unknown', $saving = true)
{
$filename = \mb_convert_encoding($filename, $this->charset, $this->charset);
$dot_ext = '.' . $this->getFileExtension($filename);
$unlocked_and_unique = false;
$i = 0;
if ($saving) {
while (!$unlocked_and_unique && $i++ < 10) {
$name = \uniqid('attachment_');
$path = $this->attachment_directory . $name . $dot_ext;
// Attempt to lock
$outFile = \fopen($path, 'wb');
if (\flock($outFile, \LOCK_EX)) {
$unlocked_and_unique = true;
} else {
\flock($outFile, \LOCK_UN);
\fclose($outFile);
}
}
if (isset($outFile) && $outFile !== false) {
\fwrite($outFile, $contents);
\fclose($outFile);
}
if (isset($name, $path)) {
$this->attachments[] = [
'name' => $filename,
'path' => $path,
'size' => $this->formatBytes(\filesize($path)),
'mime' => $mime_type
];
}
} else {
$this->attachments[] = [
'name' => $filename,
'content' => $contents,
'type' => $mime_type
];
}
}
/**
* Format Bytes into human-friendly sizes
* with the number of bytes in the largest applicable unit (eg. KB, MB, GB, TB)
*
* @return string
*/
private function formatBytes($bytes, $precision = 2)
{
$units = ['B', 'KB', 'MB', 'GB', 'TB'];
$bytes = \max($bytes, 0);
$pow = \floor(($bytes ? \log($bytes) : 0) / \log(1024));
$pow = \min($pow, \count($units) - 1);
$bytes /= \pow(1024, $pow);
return \round($bytes, $precision) . ' ' . $units[$pow];
}
private function getFileExtension($filename)
{
if (\substr($filename, 0, 1) == '.') {
return \substr($filename, 1);
}
$pieces = \explode('.', $filename);
if (\count($pieces) > 1) {
return \strtolower(\array_pop($pieces));
}
}
}