MAXakaWIZARD/xls-writer

View on GitHub
src/FormulaParser.php

Summary

Maintainability
D
2 days
Test Coverage
<?php

namespace Xls;

class FormulaParser
{
    /**
     * The index of the character we are currently looking at
     * @var integer
     */
    protected $currentChar = 0;

    /**
     * The token we are working on.
     * @var string
     */
    protected $currentToken = '';

    /**
     * The formula to parse
     * @var string
     */
    protected $formula = '';

    /**
     * The character ahead of the current char
     * @var string
     */
    protected $lookahead = '';

    /**
     * The parse tree to be generated
     * @var array
     */
    protected $parseTree = array();

    /**
     * Array of external sheets
     * @var array
     */
    protected $extSheets = array();

    /**
     * Array of sheet references in the form of REF structures
     * @var array
     */
    protected $references = array();

    /**
     * Convert a token to the proper ptg value.
     *
     * @param mixed $token The token to convert.
     * @return string the converted token
     * @throws \Exception
     */
    protected function convert($token)
    {
        if (Token::isString($token)) {
            return $this->convertString($token);
        } elseif (is_numeric($token)) {
            return $this->convertNumber($token);
        } elseif (Token::isReference($token)) {
            return $this->convertRef2d($token);
        } elseif (Token::isExternalReference($token)) {
            return $this->convertRef3d($token);
        } elseif (Token::isRange($token)) {
            return $this->convertRange2d($token);
        } elseif (Token::isExternalRange($token)) {
            return $this->convertRange3d($token);
        } elseif (Ptg::exists($token)) {
            // operators (including parentheses)
            return pack("C", Ptg::get($token));
        } elseif (Token::isArg($token)) {
            // if it's an argument, ignore the token (the argument remains)
            return '';
        }

        throw new \Exception("Unknown token $token");
    }

    /**
     * Convert a number token to ptgInt or ptgNum
     *
     * @param mixed $num an integer or double for conversion to its ptg value
     * @return string
     */
    protected function convertNumber($num)
    {
        if (preg_match("/^\d+$/", $num) && $num <= 65535) {
            // Integer in the range 0..2**16-1
            return pack("Cv", Ptg::get('ptgInt'), $num);
        } else {
            // A float
            return pack("Cd", Ptg::get('ptgNum'), $num);
        }
    }

    /**
     * Convert a string token to ptgStr
     *
     * @param string $string A string for conversion to its ptg value.
     * @throws \Exception
     * @return string the converted token
     */
    protected function convertString($string)
    {
        // chop away beggining and ending quotes
        $string = substr($string, 1, strlen($string) - 2);
        if (strlen($string) > Biff8::MAX_STR_LENGTH) {
            throw new \Exception("String is too long");
        }

        $encoding = 0;

        return pack("CCC", Ptg::get('ptgStr'), strlen($string), $encoding) . $string;
    }

    /**
     * Convert a function to a ptgFunc or ptgFuncVarV depending on the number of
     * args that it takes.
     *
     * @param string $token    The name of the function for convertion to ptg value.
     * @param integer $numArgs The number of arguments the function receives.
     *
     * @return string The packed ptg for the function
     */
    protected function convertFunction($token, $numArgs)
    {
        $ptg = Functions::getPtg($token);
        $args = Functions::getArgsNumber($token);

        // Fixed number of args eg. TIME($i,$j,$k).
        if ($args >= 0) {
            return pack("Cv", Ptg::get('ptgFuncV'), $ptg);
        }

        // Variable number of args eg. SUM($i,$j,$k, ..).
        return pack("CCv", Ptg::get('ptgFuncVarV'), $numArgs, $ptg);
    }

    /**
     * Convert an Excel range such as A1:D4 to a ptgRefV.
     *
     * @param string $range An Excel range in the A1:A2 or A1..A2 format.
     * @return string
     */
    protected function convertRange2d($range)
    {
        $separator = (Token::isRangeWithDots($range)) ? '..' : ':';
        list($cell1, $cell2) = explode($separator, $range);

        // Convert the cell references
        list($row1, $col1) = $this->cellToPackedRowcol($cell1);
        list($row2, $col2) = $this->cellToPackedRowcol($cell2);

        $ptgArea = pack("C", Ptg::get('ptgArea'));

        return $ptgArea . $row1 . $row2 . $col1 . $col2;
    }

    /**
     * Convert an Excel 3d range such as "Sheet1!A1:D4" or "Sheet1:Sheet2!A1:D4" to
     * a ptgArea3d.
     *
     * @param string $token An Excel range in the Sheet1!A1:A2 format.
     * @return string The packed ptgArea3d token
     */
    protected function convertRange3d($token)
    {
        // Split the ref at the ! symbol
        list($extRef, $range) = explode('!', $token);

        // Convert the external reference part
        $extRef = $this->getRefIndex($extRef);

        // Split the range into 2 cell refs
        list($cell1, $cell2) = explode(':', $range);

        // Convert the cell references
        list($row1, $col1) = $this->cellToPackedRowcol($cell1);
        list($row2, $col2) = $this->cellToPackedRowcol($cell2);

        $ptgArea = pack("C", Ptg::get('ptgArea3dA'));

        return $ptgArea . $extRef . $row1 . $row2 . $col1 . $col2;
    }

    /**
     * Convert an Excel reference such as A1, $B2, C$3 or $D$4 to a ptgRefV.
     *
     * @param string $cell An Excel cell reference
     * @return string The cell in packed() format with the corresponding ptg
     */
    protected function convertRef2d($cell)
    {
        list($row, $col) = $this->cellToPackedRowcol($cell);

        $ptgRef = pack("C", Ptg::get('ptgRefA'));

        return $ptgRef . $row . $col;
    }

    /**
     * Convert an Excel 3d reference such as "Sheet1!A1" or "Sheet1:Sheet2!A1" to a
     * ptgRef3d.
     *
     * @param string $cell An Excel cell reference
     * @return string The packed ptgRef3d token
     */
    protected function convertRef3d($cell)
    {
        // Split the ref at the ! symbol
        list($extRef, $cell) = explode('!', $cell);

        // Convert the external reference part
        $extRef = $this->getRefIndex($extRef);

        // Convert the cell reference part
        list($row, $col) = $this->cellToPackedRowcol($cell);

        $ptgRef = pack("C", Ptg::get('ptgRef3dA'));

        return $ptgRef . $extRef . $row . $col;
    }

    /**
     * @param string $str
     *
     * @return string
     */
    protected function removeTrailingQuotes($str)
    {
        $str = preg_replace("/^'/", '', $str); // Remove leading  ' if any.
        $str = preg_replace("/'$/", '', $str); // Remove trailing ' if any.

        return $str;
    }

    /**
     * @param $extRef
     *
     * @return array
     * @throws \Exception
     */
    protected function getRangeSheets($extRef)
    {
        $extRef = $this->removeTrailingQuotes($extRef);

        // Check if there is a sheet range eg., Sheet1:Sheet2.
        if (preg_match("/:/", $extRef)) {
            list($sheetName1, $sheetName2) = explode(':', $extRef);

            $sheet1 = $this->getSheetIndex($sheetName1);
            if ($sheet1 == -1) {
                throw new \Exception("Unknown sheet name $sheetName1 in formula");
            }

            $sheet2 = $this->getSheetIndex($sheetName2);
            if ($sheet2 == -1) {
                throw new \Exception("Unknown sheet name $sheetName2 in formula");
            }

            // Reverse max and min sheet numbers if necessary
            if ($sheet1 > $sheet2) {
                list($sheet1, $sheet2) = array($sheet2, $sheet1);
            }
        } else { // Single sheet name only.
            $sheet1 = $this->getSheetIndex($extRef);
            if ($sheet1 == -1) {
                throw new \Exception("Unknown sheet name $extRef in formula");
            }
            $sheet2 = $sheet1;
        }

        return array($sheet1, $sheet2);
    }

    /**
     * Look up the REF index that corresponds to an external sheet name
     * (or range). If it doesn't exist yet add it to the workbook's references
     * array. It assumes all sheet names given must exist.
     *
     * @param string $extRef The name of the external reference
     *
     * @throws \Exception
     * @return string The reference index in packed() format
     */
    protected function getRefIndex($extRef)
    {
        list($sheet1, $sheet2) = $this->getRangeSheets($extRef);

        $index = $this->addRef($sheet1, $sheet2);

        return pack('v', $index);
    }

    /**
     * Add reference and return its index
     * @param int $sheet1
     * @param int $sheet2
     *
     * @return int
     */
    public function addRef($sheet1, $sheet2)
    {
        // assume all references belong to this document
        $supbookIndex = 0x00;
        $ref = pack('vvv', $supbookIndex, $sheet1, $sheet2);

        $index = array_search($ref, $this->references);
        if ($index === false) {
            // if REF was not found add it to references array
            $this->references[] = $ref;
            $index = count($this->references);
        }

        return $index;
    }

    /**
     * Look up the index that corresponds to an external sheet name. The hash of
     * sheet names is updated by the addworksheet() method of the
     * Workbook class.
     *
     * @param string $sheetName
     *
     * @return integer The sheet index, -1 if the sheet was not found
     */
    protected function getSheetIndex($sheetName)
    {
        if (!isset($this->extSheets[$sheetName])) {
            return -1;
        }

        return $this->extSheets[$sheetName];
    }

    /**
     * This method is used to update the array of sheet names. It is
     * called by the addWorksheet() method of the
     * Workbook class.
     *
     * @see Workbook::addWorksheet()
     * @param string $name  The name of the worksheet being added
     * @param integer $index The index of the worksheet being added
     */
    public function addSheet($name, $index)
    {
        $this->extSheets[$name] = $index;
    }

    /**
     * pack() row and column into the required 3 or 4 byte format.
     *
     * @param string $cellAddress The Excel cell reference to be packed
     *
     * @throws \Exception
     * @return array Array containing the row and column in packed() format
     */
    protected function cellToPackedRowcol($cellAddress)
    {
        $cellAddress = strtoupper($cellAddress);
        $cell = Cell::createFromAddress($cellAddress);

        // Set the high bits to indicate if row or col are relative.
        $col = $cell->getCol();
        $col |= (int)$cell->isColRelative() << 14;
        $col |= (int)$cell->isRowRelative() << 15;
        $col = pack('v', $col);

        $row = pack('v', $cell->getRow());

        return array($row, $col);
    }

    /**
     * Advance to the next valid token.
     *
     */
    protected function advance()
    {
        $token = '';

        $position = $this->eatWhitespace();
        $formulaLength = strlen($this->formula);

        while ($position < $formulaLength) {
            $token .= $this->formula[$position];
            if ($position < $formulaLength - 1) {
                $this->lookahead = $this->formula[$position + 1];
            } else {
                $this->lookahead = '';
            }

            if ($this->match($token) != '') {
                $this->currentChar = $position + 1;
                $this->currentToken = $token;
                return;
            }

            if ($position < ($formulaLength - 2)) {
                $this->lookahead = $this->formula[$position + 2];
            } else {
                // if we run out of characters lookahead becomes empty
                $this->lookahead = '';
            }
            $position++;
        }
    }

    /**
     * @return int
     */
    protected function eatWhitespace()
    {
        $position = $this->currentChar;
        $formulaLength = strlen($this->formula);

        // eat up white spaces
        if ($position < $formulaLength) {
            while ($this->formula{$position} == " ") {
                $position++;
            }

            if ($position < ($formulaLength - 1)) {
                $this->lookahead = $this->formula{$position + 1};
            }
        }

        return $position;
    }

    /**
     * Checks if it's a valid token.
     *
     * @param string $token The token to check.
     * @return string The checked token
     */
    protected function match($token)
    {
        if (Token::isDeterministic($token)) {
            return $token;
        }

        if (Token::isLtOrGt($token)) {
            if (!Token::isPossibleLookahead($token, $this->lookahead)) {
                // it's not a GE, LTE or NE token
                return $token;
            }

            return '';
        }

        return $this->processDefaultCase($token);
    }

    /**
     * @param string $token
     *
     * @return string
     */
    protected function processDefaultCase($token)
    {
        if ($this->isReference($token)
            || $this->isExternalReference($token)
            || $this->isAnyRange($token)
            || $this->isNumber($token)
            || Token::isString($token)
            || $this->isFunctionCall($token)
        ) {
            return $token;
        }

        return '';
    }

    /**
     * @return bool
     */
    protected function lookaheadHasNumber()
    {
        return preg_match("/[0-9]/", $this->lookahead) === 1;
    }

    /**
     * @return bool
     */
    protected function isLookaheadDotOrColon()
    {
        return $this->lookahead === '.' || $this->lookahead === ':';
    }

    /**
     * @param string $token
     *
     * @return bool
     */
    protected function isAnyRange($token)
    {
        return Token::isAnyRange($token)
            && !$this->lookaheadHasNumber();
    }

    /**
     * @param string $token
     *
     * @return bool
     */
    protected function isReference($token)
    {
        return Token::isReference($token)
            && !$this->lookaheadHasNumber()
            && !$this->isLookaheadDotOrColon()
            && $this->lookahead != '!';
    }

    /**
     * @param string $token
     *
     * @return bool
     */
    protected function isExternalReference($token)
    {
        return Token::isExternalReference($token)
            && !$this->lookaheadHasNumber()
            && !$this->isLookaheadDotOrColon();
    }

    /**
     * If it's a number (check that it's not a sheet name or range)
     * @param string $token
     *
     * @return bool
     */
    protected function isNumber($token)
    {
        return is_numeric($token)
            && (!is_numeric($token . $this->lookahead) || $this->lookahead == '')
            && $this->lookahead != '!'
            && $this->lookahead != ':';
    }

    /**
     * @param string $token
     *
     * @return bool
     */
    protected function isFunctionCall($token)
    {
        return Token::isFunctionCall($token)
            && $this->lookahead == "(";
    }

    /**
     * The parsing method. It parses a formula.
     *
     * @param string $formula The formula to parse, without the initial equal sign (=).
     */
    public function parse($formula)
    {
        $this->parseTree = array();
        $this->currentChar = 0;
        $this->currentToken = '';
        $this->formula = $formula;
        $this->lookahead = (isset($formula[1])) ? $formula[1] : '';
        $this->advance();
        $this->parseTree = $this->condition();
    }

    /**
     * It parses a condition. It assumes the following rule:
     * Cond -> Expr [(">" | "<") Expr]
     *
     * @return array The parsed ptg'd tree
     */
    protected function condition()
    {
        $result = $this->expression();

        if (Token::isComparison($this->currentToken) || Token::isConcat($this->currentToken)) {
            $ptg = Token::getPtg($this->currentToken);
            $this->advance();
            $result = $this->createTree($ptg, $result, $this->expression());
        }

        return $result;
    }

    /**
     * It parses a expression. It assumes the following rule:
     * Expr -> Term [("+" | "-") Term]
     *      -> "string"
     *      -> "-" Term
     *
     * @return array The parsed ptg'd tree
     */
    protected function expression()
    {
        // If it's a string return a string node
        if (Token::isString($this->currentToken)) {
            $result = $this->createTree($this->currentToken, '', '');
            $this->advance();

            return $result;
        } elseif ($this->currentToken == Token::TOKEN_SUB) {
            // catch "-" Term
            $this->advance();

            return $this->createTree('ptgUminus', $this->expression(), '');
        }

        $result = $this->term();

        while (Token::isAddOrSub($this->currentToken)) {
            $ptg = Token::getPtg($this->currentToken);
            $this->advance();
            $result = $this->createTree($ptg, $result, $this->term());
        }

        return $result;
    }

    /**
     * This function just introduces a ptgParen element in the tree, so that Excel
     * doesn't get confused when working with a parenthesized formula afterwards.
     *
     * @see _fact()
     * @return array The parsed ptg'd tree
     */
    protected function parenthesizedExpression()
    {
        return $this->createTree('ptgParen', $this->expression(), '');
    }

    /**
     * It parses a term. It assumes the following rule:
     * Term -> Fact [("*" | "/") Fact]
     *
     * @return array The parsed ptg'd tree
     */
    protected function term()
    {
        $result = $this->fact();

        while (Token::isMulOrDiv($this->currentToken)) {
            $ptg = Token::getPtg($this->currentToken);
            $this->advance();
            $result = $this->createTree($ptg, $result, $this->fact());
        }

        return $result;
    }

    /**
     * It parses a factor. It assumes the following rule:
     * Fact -> ( Expr )
     *       | CellRef
     *       | CellRange
     *       | Number
     *       | Function
     * @throws \Exception
     * @return array The parsed ptg'd tree
     */
    protected function fact()
    {
        if ($this->currentToken == Token::TOKEN_OPEN) {
            $this->advance(); // eat the "("

            $result = $this->parenthesizedExpression();
            if ($this->currentToken != Token::TOKEN_CLOSE) {
                throw new \Exception("')' token expected.");
            }

            $this->advance(); // eat the ")"

            return $result;
        }

        if (Token::isAnyReference($this->currentToken)) {
            $result = $this->createTree($this->currentToken, '', '');
            $this->advance();

            return $result;
        } elseif (Token::isAnyRange($this->currentToken)) {
            $result = $this->currentToken;
            $this->advance();

            return $result;
        } elseif (is_numeric($this->currentToken)) {
            $result = $this->createTree($this->currentToken, '', '');
            $this->advance();

            return $result;
        } elseif (Token::isFunctionCall($this->currentToken)) {
            $result = $this->func();

            return $result;
        }

        throw new \Exception(
            "Syntax error: " . $this->currentToken .
            ", lookahead: " . $this->lookahead .
            ", current char: " . $this->currentChar
        );
    }

    /**
     * It parses a function call. It assumes the following rule:
     * Func -> ( Expr [,Expr]* )
     * @throws \Exception
     * @return string|array The parsed ptg'd tree
     */
    protected function func()
    {
        $numArgs = 0; // number of arguments received
        $function = strtoupper($this->currentToken);
        $result = ''; // initialize result

        $this->advance();
        $this->advance(); // eat the "("

        while ($this->currentToken != ')') {
            if ($numArgs > 0) {
                if (!Token::isCommaOrSemicolon($this->currentToken)) {
                    throw new \Exception(
                        "Syntax error: comma expected in " .
                        "function $function, arg #{$numArgs}"
                    );
                }

                $this->advance(); // eat the "," or ";"
            } else {
                $result = '';
            }

            $result = $this->createTree('arg', $result, $this->condition());

            $numArgs++;
        }

        $args = Functions::getArgsNumber($function);
        if ($args >= 0 && $args != $numArgs) {
            // If fixed number of args eg. TIME($i,$j,$k). Check that the number of args is valid.
            throw new \Exception("Incorrect number of arguments in function $function() ");
        }

        $result = $this->createTree($function, $result, $numArgs);
        $this->advance(); // eat the ")"

        return $result;
    }

    /**
     * Creates a tree. In fact an array which may have one or two arrays (sub-trees)
     * as elements.
     *
     * @param mixed $value The value of this node.
     * @param mixed $left  The left array (sub-tree) or a final node.
     * @param mixed $right The right array (sub-tree) or a final node.
     * @return array A tree
     */
    protected function createTree($value, $left, $right)
    {
        return array(
            'value' => $value,
            'left' => $left,
            'right' => $right
        );
    }

    /**
     * Builds a string containing the tree in reverse polish notation (What you
     * would use in a HP calculator stack).
     * The following tree:
     *
     *    +
     *   / \
     *  2   3
     *
     * produces: "23+"
     *
     * The following tree:
     *
     *    +
     *   / \
     *  3   *
     *     / \
     *    6   A1
     *
     * produces: "36A1*+"
     *
     * In fact all operands, functions, references, etc... are written as ptg's
     *
     * @param array $tree The optional tree to convert.
     * @return string The tree in reverse polish notation
     */
    protected function toReversePolish($tree)
    {
        if (!is_array($tree)) {
            return $this->convert($tree);
        }

        // if it's a function convert it here (so we can set it's arguments)
        if ($this->isFunction($tree['value'])) {
            return $this->getFunctionPolish($tree);
        }

        $polish = $this->getTreePartPolish($tree['left']);
        $polish .= $this->getTreePartPolish($tree['right']);
        $polish .= $this->convert($tree['value']);

        return $polish;
    }

    /**
     * @param $tree
     *
     * @return string
     */
    protected function getFunctionPolish($tree)
    {
        $polish = '';

        // left subtree for a function is always an array.
        if ($tree['left'] != '') {
            $polish .= $this->toReversePolish($tree['left']);
        }

        $polish .= $this->convertFunction($tree['value'], $tree['right']);

        return $polish;
    }

    /**
     * @param $value
     *
     * @return bool
     */
    protected function isFunction($value)
    {
        return Token::isFunctionCall($value)
            && !Token::isReference($value)
            && !Token::isRangeWithDots($value)
            && !is_numeric($value)
            && !Ptg::exists($value);
    }

    /**
     * @param $part
     *
     * @return string
     * @throws \Exception
     */
    protected function getTreePartPolish($part)
    {
        $polish = '';

        if (is_array($part)) {
            $polish .= $this->toReversePolish($part);
        } elseif ($part != '') {
            // It's a final node
            $polish .= $this->convert($part);
        }

        return $polish;
    }

    /**
     * @return array
     */
    public function getReferences()
    {
        return $this->references;
    }

    /**
     * @param $formula
     *
     * @return string
     */
    public function getReversePolish($formula)
    {
        $this->parse($formula);

        return $this->toReversePolish($this->parseTree);
    }
}