mgribov/apiscrape

View on GitHub
src/Scrape/Storage/HttpStorage.php

Summary

Maintainability
A
2 hrs
Test Coverage
<?php

namespace Scrape\Storage;

/**
 * Manages a list of saved paths, their responses and cache/etag data 
 */
class HttpStorage implements HttpStorageInterface {
    
    protected $debug = false;
    
    protected $backend;
    
    /**
     * if no Cache-Control received, and no other value configured, cache for 1 day by default
     * received Cache-Control will overwrite this
     * @var type 
     */
    protected $cacheTime = 86400;
    
    /**
     * Object format
     * @var array
     */
    protected $object = array(
        'path' => null,
        'cache' => null,
        'etag' => null,
        'response' => null,
        );
    
    /**
     *
     * @param bool $v 
     */
    public function setDebug($v) {
        $this->debug = $v;
    }
    
    public function setCacheTime($sec) {
        $this->cacheTime = $sec;
    }
    
    public function getCacheTime() {
        return $this->cacheTime;
    }
    
    /**
     *
     * @param \Scrape\Storage\Backend\BackendInterface $b 
     */
    public function setBackend(\Scrape\Storage\Backend\BackendInterface $b) {
        $this->backend = $b;
    }
    
    /**
     *
     * @return mixed
     */
    public function getCache() {
        if (!is_null($this->object['response'])) {
            return $this->object['cache'];
        }

        return false;
    }
    

    /**
     *
     * @return mixed
     */
    public function getEtag() {
        if (is_array($this->object['response']) && count($this->object['response']) > 0 && array_key_exists('etag', $this->object)) {
            return $this->object['etag']; 
        }
        
        return false;
    }
    
    /**
     *
     * @return array
     */
    public function getResponse() {
        return $this->object['response'];
    }
    
    /**
     *
     * @return bool
     */
    public function isCurrent() {
        if ($this->cacheTime == 0) {
            return false;
        }

        return (is_array($this->object['response']) && count($this->object['response']) > 0 && $this->object['cache'] > time());
    }

    /**
     *
     * @param string $path
     * @return mixed 
     */
    public function remove($path) {
        return $this->backend->delete($path);
    }
    
    /**
     *
     * @param string $path
     * @return array 
     */
    public function get($path) {
        if ($this->cacheTime == 0) {
            $this->backend->delete($path);

            return [
                'path' => null,
                'cache' => null,
                'etag' => null,
                'response' => null,
            ];
        }

        $this->object = $this->backend->get($path);        
        return $this->object;
    }
    
    /**
     * Just bump up cache timer, used when handling 304's
     *
     * @param string $path
     * @return bool
     */
    public function bumpCache($path) {
        $this->get($path);

        if (is_array($this->object['response']) && count($this->object['response']) > 0) {
            // @todo whats a good default?
            $this->object['cache'] = (int)(time() + 3600);
            $this->backend->delete($path);
            $this->backend->put($this->object);
            return true;
        }

        return false;
    }

    /**
     * Create or replace a current object by its path
     *
     * @param string $path
     * @param array $response
     * @param string $header
     * @return bool
     */
    public function save($path, $response, $header) {
        if (!(is_array($response) && count($response) > 0)) {
            $this->__debug("no valid response for $path, will not save, invalidating any saved copy");            
            $this->backend->delete($path);
            return false;
        }

        if ($this->cacheTime == 0) {
            return false;
        }

        $this->object['path'] = $path;
        $this->object['response'] = $response;             
 
        // parse response headers
        $all_headers = array();
        $cache_headers = array();
        
        // collect all headers in nicer format
        $headers = explode("\r\n", $header);
        foreach ($headers as $h) {
            $a = explode(':', $h);
            if (count($a) == 2) {
                $all_headers[trim($a[0])] = trim($a[1]);
            }
        }
        
        // get max-age        
        if (array_key_exists('Cache-Control', $all_headers)) {
            preg_match('/max-age=(\d+)/', $all_headers['Cache-Control'], $cache_headers);            
        }

        if (count($cache_headers)) {
            $this->__debug("got cache-control for $path, cache is valid for {$cache_headers[1]} seconds");
            $this->object['cache'] = (int)(time() + $cache_headers[1]);
            
        } else {
            $this->__debug("no cache-control for $path, caching for {$this->cacheTime} seconds");
            $this->object['cache'] = (int)(time() + $this->cacheTime);            
        }

        // get etag
        $etag = array_key_exists('Etag', $all_headers) ? preg_replace('/"/', '', $all_headers['Etag']) : null;
        if (strlen($etag)) {
            $this->__debug("new etag for $path");            
            $this->object['etag'] = $etag;
            
        } else {
            $this->__debug("no etag for $path");
        }

        $this->backend->delete($path);
        $this->backend->put($this->object);
        
        return true;
    }
        
    /**
     *
     * @param mixed $data 
     */    
    public function __debug($data) {
        if ($this->debug) {
            var_dump($data);
        }
    }
}