keboola/php-csv

View on GitHub
src/CsvReader.php

Summary

Maintainability
A
0 mins
Test Coverage
A
96%
<?php

declare(strict_types=1);

namespace Keboola\Csv;

use Iterator;
use ReturnTypeWillChange;

class CsvReader extends AbstractCsvFile implements Iterator
{
    /**
     * @deprecated use Keboola\Csv\CsvOptions::DEFAULT_ENCLOSURE
     */
    const DEFAULT_ESCAPED_BY = CsvOptions::DEFAULT_ESCAPED_BY;
    const SAMPLE_SIZE = 10000;

    /**
     * @var int
     */
    private $skipLines;

    /**
     * @var int
     */
    private $rowCounter = 0;

    /**
     * @var array|null|false
     */
    private $currentRow;

    /**
     * @var array
     */
    private $header;

    /**
     * @var string
     */
    private $lineBreak;

    /**
     * CsvFile constructor.
     * @param string|resource $file
     * @param string $delimiter
     * @param string $enclosure
     * @param string $escapedBy
     * @param int $skipLines
     * @throws Exception
     */
    public function __construct(
        $file,
        $delimiter = CsvOptions::DEFAULT_DELIMITER,
        $enclosure = CsvOptions::DEFAULT_ENCLOSURE,
        $escapedBy = CsvOptions::DEFAULT_ESCAPED_BY,
        $skipLines = 0
    ) {
        $this->options = new CsvOptions($delimiter, $enclosure, $escapedBy);
        $this->setSkipLines($skipLines);
        $this->setFile($file);

        $this->lineBreak = $this->detectLineBreak();
        $this->validateLineBreak();

        rewind($this->filePointer);
        $header = UTF8BOMHelper::detectAndRemoveBOM($this->readLine());
        if (is_array($header) && $header[0] === null) {
            $header = [];
        }
        $this->header = $header;
        $this->rewind();
    }

    /**
     * @param integer $skipLines
     * @return CsvReader
     * @throws InvalidArgumentException
     */
    protected function setSkipLines($skipLines)
    {
        $this->validateSkipLines($skipLines);
        $this->skipLines = $skipLines;
        return $this;
    }

    /**
     * @param integer $skipLines
     * @throws InvalidArgumentException
     */
    protected function validateSkipLines($skipLines)
    {
        if (!is_int($skipLines) || $skipLines < 0) {
            throw new InvalidArgumentException(
                "Number of lines to skip must be a positive integer. \"$skipLines\" received.",
                Exception::INVALID_PARAM,
            );
        }
    }

    /**
     * @param $fileName
     * @throws Exception
     */
    protected function openCsvFile($fileName)
    {
        if (!is_file($fileName)) {
            throw new Exception(
                'Cannot open file ' . $fileName,
                Exception::FILE_NOT_EXISTS,
            );
        }
        $this->filePointer = @fopen($fileName, 'r');
        if (!$this->filePointer) {
            throw new Exception(
                "Cannot open file {$fileName} " . error_get_last()['message'],
                Exception::FILE_NOT_EXISTS,
            );
        }
    }

    /**
     * @return string
     */
    protected function detectLineBreak()
    {
        @rewind($this->getFilePointer());
        $sample = @fread($this->getFilePointer(), self::SAMPLE_SIZE);
        if (substr((string) $sample, -1) === "\r") {
            // we might have hit the file in the middle of CR+LF, only getting CR
            @rewind($this->getFilePointer());
            $sample = @fread($this->getFilePointer(), self::SAMPLE_SIZE+1);
        }

        return LineBreaksHelper::detectLineBreaks($sample, $this->getEnclosure(), $this->getEscapedBy());
    }

    /**
     * @return array|false|null
     * @throws Exception
     * @throws InvalidArgumentException
     */
    protected function readLine()
    {
        // allow empty enclosure hack
        $enclosure = !$this->getEnclosure() ? chr(0) : $this->getEnclosure();
        $escapedBy = !$this->getEscapedBy() ? chr(0) : $this->getEscapedBy();
        return @fgetcsv($this->getFilePointer(), null, $this->getDelimiter(), $enclosure, $escapedBy);
    }

    /**
     * @return string
     * @throws InvalidArgumentException
     */
    protected function validateLineBreak()
    {
        $lineBreak = $this->getLineBreak();
        if (in_array($lineBreak, ["\r\n", "\n"])) {
            return $lineBreak;
        }

        throw new InvalidArgumentException(
            "Invalid line break. Please use unix \\n or win \\r\\n line breaks.",
            Exception::INVALID_PARAM,
        );
    }

    /**
     * @return string
     */
    public function getLineBreak()
    {
        return $this->lineBreak;
    }

    /**
     * @inheritdoc
     */
    #[ReturnTypeWillChange]
    public function rewind()
    {
        rewind($this->getFilePointer());
        for ($i = 0; $i < $this->skipLines; $i++) {
            $this->readLine();
        }
        $this->currentRow = $this->readLine();
        $this->rowCounter = 0;
    }

    /**
     * @return string
     */
    public function getEscapedBy()
    {
        return $this->options->getEscapedBy();
    }

    /**
     * @return int
     */
    public function getColumnsCount()
    {
        return count($this->getHeader());
    }

    /**
     * @return array
     */
    public function getHeader()
    {
        if ($this->header) {
            return $this->header;
        }
        return [];
    }

    /**
     * @return string
     */
    public function getLineBreakAsText()
    {
        return trim(json_encode($this->getLineBreak()), '"');
    }

    /**
     * @inheritdoc
     */
    #[ReturnTypeWillChange]
    public function current()
    {
        return $this->currentRow;
    }

    /**
     * @inheritdoc
     */
    #[ReturnTypeWillChange]
    public function next()
    {
        $this->currentRow = $this->readLine();
        $this->rowCounter++;
    }

    /**
     * @inheritdoc
     */
    #[ReturnTypeWillChange]
    public function key()
    {
        return $this->rowCounter;
    }

    /**
     * @inheritdoc
     */
    #[ReturnTypeWillChange]
    public function valid()
    {
        return $this->currentRow !== false;
    }
}