CORE-POS/IS4C

View on GitHub
fannie/classlib2.0/data/FileData.php

Summary

Maintainability
C
1 day
Test Coverage
F
16%
<?php
/*******************************************************************************

    Copyright 2015 Whole Foods Co-op

    This file is part of CORE-POS.

    IT CORE is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    IT CORE is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    in the file license.txt along with IT CORE; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

*********************************************************************************/

namespace COREPOS\Fannie\API\data;

/**
  @class FileData

  Helper functions for extracting data
  from files
*/
class FileData
{
    /**
      Get two-dimensional array of file data
      @param $filename [string] file name with full path
      @param $limit if specified only return $limit records
      @return An array of arrays. Each inner array
        represents one line of data
    */
    public static function fileToArray($filename, $limit=0) 
    {
        if (!file_exists($filename)) {
            return array();
        }

        switch (substr(basename($filename), -3)) {
            case 'csv':
                return self::csvToArray($filename, $limit);
            case 'xls':
                return self::xlsToArray($filename, $limit);
            case 'lsx':
                // php tempfile nameing only allows a three character prefix
                return self::xlsxToArray($filename, $limit);
            case 'pdf':
                return self::pdfToArray($filename, $limit);
            case 'txt':
                return self::txtToArray($filename, $limit);
        }

        return array();
    }

    /**
      Helper for csv files. See fileToArray()
    */
    public static function csvToArray($filename, $limit=0)
    {
        $fptr = fopen($filename,'r');
        if (!$fptr) {
            return array();
        }
        $ret = array();
        while (!feof($fptr)) {
            $ret[] = fgetcsv($fptr);
            if ($limit > 0 && count($ret) >= $limit) {
                break;
            }
        }
        fclose($fptr);

        return $ret;
    }

    /**
      Helper for xls files. See fileToArray()
    */
    public static function xlsToArray($filename, $limit)
    {
        /** 
          PhpOffice can read both file variants just fine if it's
          available.
        */
        if (!class_exists('\\PhpOffice\\PhpSpreadsheet\\IOFactory') || class_exists('\\PHPExcel_IOFactory')) {
            return self::xlsxToArray($filename, $limit);
        }

        if (!class_exists('Spreadsheet_Excel_Reader')) {
            include_once(dirname(__FILE__).'/../../src/Excel/xls_read/reader.php');
        }

        $data = new \Spreadsheet_Excel_Reader();
        $data->setOutputEncoding('ISO-8859-1');
        $data->read($filename);

        $sheet = $data->sheets[0];
        $rows = $sheet['numRows'];
        $cols = $sheet['numCols'];
        $ret = array();
        for($i=1; $i<=$rows; $i++) {
            $line = array();
            for ($j=1; $j<=$cols; $j++) {
                if (isset($sheet['cells'][$i]) && isset($sheet['cells'][$i][$j])) {
                    $line[] = $sheet['cells'][$i][$j];
                } else {
                    $line[] = '';
                }
            }
            $ret[] = $line;
            if ($limit != 0 && count($ret) >= $limit) {
                break;
            }
        }

        return $ret;
    }

    /**
      Helper for xlsx files. See fileToArray()
    */
    public static function xlsxToArray($filename, $limit=0)
    {
        if (!class_exists('\\PhpOffice\\PhpSpreadsheet\\IOFactory') && !class_exists('\\PHPExcel_IOFactory')) {
            return false;
        }

        if (class_exists('\\PhpOffice\\PhpSpreadsheet\\IOFactory')) {
            $objPHPExcel = \PhpOffice\PhpSpreadsheet\IOFactory::load($filename);
            $firstCol = 1;
        } else {
            $objPHPExcel = \PHPExcel_IOFactory::load($filename);
            $firstCol = 0;
        }
        $sheet = $objPHPExcel->getActiveSheet();
        $rows = $sheet->getHighestRow();
        if (class_exists('\\PhpOffice\\PhpSpreadsheet\\IOFactory')) {
            $cols = \PhpOffice\PhpSpreadsheet\Cell\Coordinate::columnIndexFromString($sheet->getHighestColumn());
        } else {
            $cols = \PHPExcel_Cell::columnIndexFromString($sheet->getHighestColumn());
        }
        $ret = array();
        for ($i=1; $i<=$rows; $i++) {
            $new = array_map(function ($j) use ($i, &$sheet, $firstCol) {
                return $sheet->getCellByColumnAndRow($j, $i)->getValue();
            }, range($firstCol, $cols));
            $ret[] = $new;
            if ($limit > 0 && count($ret) >= $limit) {
                break;
            }
        }

        return $ret;
    }

    public static function excelFloatToDate($float)
    {
        $days = floor($float);
        $time = 24.0 * ($float - $days);
        $hour = floor($time); 
        $remainder = 60.0 * ($time - $hour);
        $minutes = floor($remainder);
        $remainder = 60.0 * ($remainder - $minutes);
        $seconds = floor($remainder);

        return date('Y-m-d H:i:s', mktime($hour, $minutes, $seconds, 1, $days-1, 1900));
    }

    /**
      Reduce potential for CSV based exploits

      Different spreadsheet software *may* interpret values in CSVs/TSVs
      that begin with =, @, +, or - as forumals and cause the spreadsheet to
      execute the cell's value. This may or may not include user-facing 
      warning messages.

      One common solution is to prefix such fields with single quote ('). I'm
      not using that option since it creates different headaches for users trying
      to use the CSV/TSV as a data interchange format rather than just look at
      it in Excel. Instead:

      1. Leading =, @, or + characters are simply removed. This should include multiples,
         e.g. "=@+=1+1" becomes "1+1". This creates a small set of strings that cannot
         be used as product names, brands, etc but should be an OK compromise.
      2. Values with a leading - do need to be allowed. This are validated as either
         negative integers (-123) or negative floats (-123.45).
    */
    public static function excelNoFormula($str)
    {
        $str = trim($str);
        $first = substr(trim($str), 0, 1);
        while ($first == '=' || $first == '@' || $first == '+' || $first == '-') {
            switch ($first) {
                case '-':
                    if (preg_match('/^-[0-9]+\.[0-9]+$/', $str) || preg_match('/^-[0-9]+$/', $str)) {
                        return $str;
                    }
                    return 'badval';
                    break;

                case '=':
                case '@':
                case '+':
                default:
                    $str = substr($str, 1);
                    break;
            }
            $first = substr(trim($str), 0, 1);
        }

        return $str;
    }

    public static function pdfToArray($filename, $limit)
    {
        if (!class_exists('\\Smalot\\PdfParser\\Parser')) {
            return false;
        }

        $parser = new \Smalot\PdfParser\Parser();
        $pdf = $parser->parseFile($filename);
        $lines = explode("\n", $pdf->getText());

        return $limit ? array_slice($lines, 0, $limit) : $lines;
    }

    public static function txtToArray($filename, $limit=0)
    {
        $fptr = fopen($filename,'r');
        if (!$fptr) {
            return array();
        }
        $ret = array();
        while (!feof($fptr)) {
            $ret[] = fgetcsv($fptr, 0, "\t");
            if ($limit > 0 && count($ret) >= $limit) {
                break;
            }
        }
        fclose($fptr);

        return $ret;
    }
}