plugins/site_map/modules/yf_site_map.class.php

Summary

Maintainability
F
3 days
Test Coverage
<?php

/**
 * Site Map Generator (implementation of the http://www.sitemaps.org/protocol.php).
 *
 * @author        YFix Team <yfix.dev@gmail.com>
 * @version        1.0
 */
class yf_site_map
{
    /** @var bool Enable\disable site map generation */
    public $SITE_MAP_ENABLED = false;
    /** @var array Array of site modules to include their parts of sitemap */
    public $MODULES_TO_INCLUDE = [];
    /** @var string Sitemap store folder */
    public $SITEMAP_STORE_FOLDER = 'site_map/';
    /** @var string Sitemap file name */
    public $SITEMAP_FILE_NAME = 'site_map';
    /** @var @conf_skip */
    public $HOOK_NAMES = ['_hook_sitemap', '_hook_site_map', '_site_map_items'];
    /** @var mixed */
    public $DEFAULT_LAST_UPDATE = '';
    /** @var mixed */
    public $DEFAULT_PRIORITY = '';
    /** @var mixed */
    public $DEFAULT_CHANGEFREQ = 'daily';
    /** @var bool Notify Google */
    public $NOTIFY_GOOGLE = false;
    /** @var int Max entries for sitemap file */
    public $MAX_ENTRIES = 50000;
    /** @var int Max sitemap filesize */
    public $MAX_SIZE = 10000000;
    /** @var int Limit max URL length to the 2048 symbols */
    public $MAX_URL_LENGTH = 2048;
    /** @var int Max number of sitemaps */
    public $MAX_SITEMAPS = 1000;
    /** @var bool Do not change! It's a flag which is become 'true' if number of sitemap files reached $MAX_SITEMAPS. There will be no actions after this */
    public $LIMIT_REACHED = false;
    /** @var array Frequency avail values @conf_skip */
    public $CHANGEFREQ_VALUES = ['always', 'hourly', 'daily', 'weekly', 'monthly', 'yearly', 'never'];
    /** @var string @conf_skip */
    public $_notify_url = '';
    /** @var string @conf_skip */
    public $_file_name = '';
    /** @var string @conf_skip */
    public $_file_extension = '.xml';
    /** @var bool Use locking */
    public $USE_LOCKING = false;
    /** @var int Lock timeout */
    public $LOCK_TIMEOUT = 600;
    /** @var string Lock file name */
    public $LOCK_FILE_NAME = 'site_map.lock';
    /** @var int Site map TTL */
    public $SITEMAP_LIVE_TIME = 43200;    // 60*60*12 = 12hours
    /** @var bool Allow rewrite */
    public $ALLOW_REWRITE = false;
    /** @var int Rewrite split length */
    public $REWRITE_SPLIT_LENGTH = 200000;
    /** @var bool */
    public $DIRECT_FILE_OUTPUT = true;
    /** @var bool */
    public $TEST_MODE = false;
    /** @var bool */
    public $SECURITY_URL_PARAM = '';

    /**
     * Catch missing method call.
     * @param mixed $name
     * @param mixed $arguments
     */
    public function __call($name, $arguments)
    {
        trigger_error(__CLASS__ . ': No method ' . $name, E_USER_WARNING);
        return false;
    }

    /**
     * YF module constructor.
     */
    public function _init()
    {
        if ($this->USE_LOCKING) {
            $this->LOCK_FILE_NAME = STORAGE_PATH . $this->LOCK_FILE_NAME;
        }
        $this->SITEMAP_WEB_PATH = WEB_PATH . 'uploads/' . $this->SITEMAP_STORE_FOLDER;
        $this->SITEMAP_STORE_FOLDER = PROJECT_PATH . 'uploads/' . $this->SITEMAP_STORE_FOLDER;

        // Calculate size of a string (web path) adding to url when file processed
        $this->_path_size_bytes = strlen(htmlspecialchars(utf8_encode(WEB_PATH)));
        // Leave some space in file size
        $this->MAX_SIZE = $this->MAX_SIZE * 0.9;

        $this->_notify_url = url('/@object');

        if (empty($this->MODULES_TO_INCLUDE)) {
            $this->MODULES_TO_INCLUDE = $this->_get_modules_from_files();
        }
        // Ensure uniqueness of module names
        $tmp = [];
        foreach ((array) $this->MODULES_TO_INCLUDE as $v) {
            $tmp[$v] = $v;
        }
        $this->MODULES_TO_INCLUDE = $tmp;

        // Remove non-active modules
        $active_modules = db()->select('name', 'name AS n2')->from('user_modules')->where('active = 1')->get_2d();
        foreach ($this->MODULES_TO_INCLUDE as $k => $name) {
            if ( ! isset($active_modules[$name])) {
                unset($this->MODULES_TO_INCLUDE[$k]);
            }
        }
        if ($this->ALLOW_REWRITE && ! tpl()->REWRITE_MODE) {
            $this->ALLOW_REWRITE = false;
        }
        $this->_file_extension = '_' . conf('language') . $this->_file_extension;
    }

    /**
     * Default method.
     */
    public function show()
    {
        if ($this->SECURITY_URL_PARAM && $_GET['id'] != $this->SECURITY_URL_PARAM) {
            main()->NO_GRAPHICS = true;
            return print _e('Wrong url');
        }
        return $this->_generate_sitemap();
    }

    /**
     * Sitemap format generator (compatible with Google, Yahoo, MSN etc).
     *
     * https://www.google.com/webmasters/sitemaps/docs/en/about.html
     * XML-Specification: https://www.google.com/webmasters/sitemaps/docs/en/protocol.html
     *
     * You can use the gzip feature or compress your Sitemap files using gzip.
     * Please note that your uncompressed Sitemap file may not be larger than 10MB.
     *
     * No support for more the 50.000 URLs at the moment. See
     * http://www.google.com/webmasters/sitemaps/docs/en/protocol.html#sitemapFileRequirements
     */
    public function _generate_sitemap()
    {
        main()->NO_GRAPHICS = true;
        if ( ! $this->SITE_MAP_ENABLED) {
            return false;
        }
        // !! Important !!
        // Turn off rewrite adding url params into sitemap urls
        _class('rewrite')->URL_ADD_BUILTIN_PARAMS = false;

        $_sitemap_base_name = $this->SITEMAP_FILE_NAME;
        // Check if needed to recreate sitemap files
        $_path = $this->SITEMAP_STORE_FOLDER . $this->SITEMAP_FILE_NAME . '_index' . $this->_file_extension;
        if (file_exists($_path) && (filemtime($_path) + $this->SITEMAP_LIVE_TIME) > time() && ! $this->TEST_MODE) {
            // send headers
            $this->_redirect_sitemap($this->SITEMAP_WEB_PATH . $this->SITEMAP_FILE_NAME . '_index' . $this->_file_extension);
            return false;
        }
        if ( ! file_exists($_path)) {
            $_path = $this->SITEMAP_STORE_FOLDER . $this->SITEMAP_FILE_NAME . '1' . $this->_file_extension;
            if (file_exists($_path) && (filemtime($_path) + $this->SITEMAP_LIVE_TIME) > time() && ! $this->TEST_MODE) {
                // send headers
                $this->_redirect_sitemap($this->SITEMAP_WEB_PATH . $this->SITEMAP_FILE_NAME . '1' . $this->_file_extension);
                return false;
            }
        }
        // Start generate sitemap
        @ignore_user_abort(true);
        @set_time_limit($this->LOCK_TIMEOUT);

        if ($this->USE_LOCKING) {
            clearstatcache();
            if (file_exists($this->LOCK_FILE_NAME)) {
                if ((time() - filemtime($this->LOCK_FILE_NAME)) > $this->LOCK_TIMEOUT) {
                    unlink($this->LOCK_FILE_NAME);
                } else {
                    return false;
                }
            }
            file_put_contents($this->LOCK_FILE_NAME, time());
        }
        $this->_sitemap_file_counter = 1;
        _mkdir_m($this->SITEMAP_STORE_FOLDER);
        _class('dir')->delete_files($this->SITEMAP_STORE_FOLDER, '/' . $_sitemap_base_name . '.*/i');

        $this->_fp = fopen($this->SITEMAP_STORE_FOLDER . $this->SITEMAP_FILE_NAME . $this->_sitemap_file_counter . $this->_file_extension, 'w+');
        $this->_total_length = 0;
        $this->_entries_counter = 0;

        $this->_output($this->_tpl_sitemap_header());
        $this->_total_length = strlen($header_text);

        foreach ((array) $this->MODULES_TO_INCLUDE as $module_name) {
            $module_obj = module_safe($module_name);
            if ($this->LIMIT_REACHED || ! is_object($module_obj)) {
                continue;
            }
            $hook_name = '';
            foreach ((array) $this->HOOK_NAMES as $_hook_name) {
                if (method_exists($module_obj, $_hook_name)) {
                    $hook_name = $_hook_name;
                    break;
                }
            }
            if ($hook_name) {
                $items = $module_obj->$hook_name($this);
                if (is_array($items)) {
                    foreach ((array) $items as $item) {
                        $this->_add($item);
                    }
                }
            }
        }
        if ( ! $this->LIMIT_REACHED) {
            $this->_output($this->_tpl_sitemap_footer());
            @fclose($this->_fp);
        }
        $files = _class('dir')->scan($this->SITEMAP_STORE_FOLDER);
        foreach ((array) $files as $file_name) {
            if (false !== strpos($file_name, '.svn') || false !== strpos($file_name, '.git')) {
                continue;
            }
            $path_info = pathinfo($file_name);
            $_sitemap_filename_pattern = '/^(' . $this->SITEMAP_FILE_NAME . ')([0-9]{1,3})(\.xml)$/';
            if ( ! preg_match($_sitemap_filename_pattern, $path_info['basename'])) {
                continue;
            }
            $sitemaps[$file_name] = $file_name;
        }
        if ($sitemaps) {
            foreach ((array) $sitemaps as $filename) {
                $this->_process_sitemap_file($filename);
            }
        }
        $this->_file_for_google = str_replace(PROJECT_PATH, WEB_PATH, $this->SITEMAP_STORE_FOLDER) . $this->SITEMAP_FILE_NAME . '1' . $this->_file_extension;
        if ($sitemaps && count((array) $sitemaps) > 1) {
            $this->_output($this->_tpl_sitemap_index_header());

            // Create sitemap index
            $this->_fp = fopen($this->SITEMAP_STORE_FOLDER . $this->SITEMAP_FILE_NAME . '_index' . $this->_file_extension, 'w+');
            foreach ((array) $sitemaps as $sitemap_file) {
                $string = "\t<sitemap>\n";
                $string .= "\t\t<loc>" . str_replace(PROJECT_PATH, WEB_PATH, $sitemap_file) . "</loc>\n";
                $last_mod = $this->_iso8601_date(filemtime($sitemap_file));
                if ($last_mod) {
                    $string .= "\t\t<lastmod>" . $last_mod . "</lastmod>\n";
                }
                $string .= "\t</sitemap>\n";
                // Store entry data to file
                $this->_output($string);
            }
            fclose($this->_fp);

            $this->_output($this->_tpl_sitemap_index_footer());

            $this->_file_for_google = str_replace(PROJECT_PATH, WEB_PATH, $this->SITEMAP_STORE_FOLDER) . $this->SITEMAP_FILE_NAME . '_index' . $this->_file_extension;
        }
        // Release lock
        if ($this->USE_LOCKING) {
            unlink($this->LOCK_FILE_NAME);
        }
        $this->_redirect_sitemap($this->_file_for_google);

        $this->_do_notify_google();
    }

    /**
     * Alias for _store_item.
     * @param mixed $data
     */
    public function _add($data = [])
    {
        return $this->_store_item($data);
    }

    /**
     * Store sitemap item.
     * @param mixed $data
     */
    public function _store_item($data = [])
    {
        if (is_string($data) && strlen($data)) {
            $data = ['url' => $data];
        }
        if (empty($data) || empty($data['url'])) {
            return false;
        }
        if ($this->LIMIT_REACHED) {
            return false;
        }
        // Shortcut for calling url('/some_module')
        if (substr($data['url'], 0, 1) === '/' && substr($data['url'], 0, 2) !== '//') {
            $data['url'] = url($data['url']);
        }
        $location = $data['url'];
        if ((strlen($location) + $this->_path_size_bytes) >= $this->MAX_URL_LENGTH) {
            return false;
        }
        if ( ! $data['last_update'] && $this->DEFAULT_LAST_UPDATE) {
            $data['last_update'] = $this->DEFAULT_LAST_UPDATE;
            if (is_callable($data['last_update'])) {
                $func = $data['last_update'];
                $data['last_update'] = $func($data);
            }
        }
        if ( ! $data['priority'] && $this->DEFAULT_PRIORITY) {
            $data['priority'] = $this->DEFAULT_PRIORITY;
            if (is_callable($data['priority'])) {
                $func = $data['priority'];
                $data['priority'] = $func($data);
            }
        }
        if ( ! $data['changefreq'] && $this->DEFAULT_CHANGEFREQ) {
            $data['changefreq'] = $this->DEFAULT_CHANGEFREQ;
            if (is_callable($data['changefreq'])) {
                $func = $data['changefreq'];
                $data['changefreq'] = $func($data);
            }
        }

        $string .= "\t<url>\n";
        $string .= "\t\t<loc>" . $location . "</loc>\n";
        // Adding size of path string to a total size of data
        $this->_total_length += $this->_path_size_bytes;
        if ($data['last_update']) {
            $string .= "\t\t<lastmod>" . $this->_iso8601_date($data['last_update']) . "</lastmod>\n";
        }
        if ($data['priority']) {
            $string .= "\t\t<priority>" . (float) ($data['priority']) . "</priority>\n";
        }
        if ($data['changefreq'] && in_array($data['changefreq'], $this->CHANGEFREQ_VALUES)) {
            $string .= "\t\t<changefreq>" . $data['changefreq'] . "</changefreq>\n";
        }
        $string .= "\t</url>\n";

        $this->_total_length += strlen($string);
        $this->_entries_counter++;
        // Check total length and number of entries before save
        if ($this->_total_length >= $this->MAX_SIZE || $this->_entries_counter >= $this->MAX_ENTRIES) {
            // Finish to write file and create another one
            if ($this->_fp) {
                $this->_output($this->_tpl_sitemap_footer());
                fclose($this->_fp);
            }
            if ($this->_entries_counter >= $this->MAX_ENTRIES || $this->_total_length >= $this->MAX_SIZE) {
                $this->_entries_counter = 0;
                $this->_total_length = 0;
            }
            // Check if we reached max sitemap files number
            if ($this->_sitemap_file_counter >= $this->MAX_SITEMAPS) {
                $this->LIMIT_REACHED = true;
            }
            if ( ! $this->LIMIT_REACHED) {
                $this->_sitemap_file_counter++;
                // Create and open to write next sitemap file
                $this->_fp = fopen($this->SITEMAP_STORE_FOLDER . $this->SITEMAP_FILE_NAME . $this->_sitemap_file_counter . $this->_file_extension, 'w+');

                $this->_output($this->_tpl_sitemap_header());
                $this->_total_length = strlen($this->_tpl_sitemap_header());
            }
        }
        if ( ! $this->LIMIT_REACHED) {
            $this->_output($string);
        }
    }

    /**
     * Output generated content.
     * @param mixed $string
     */
    public function _output($string)
    {
        if ($this->TEST_MODE) {
            if (DEBUG_MODE) {
                header('Content-type: text/html');
                echo '<pre>' . _prepare_html($string) . '</pre>';
            } else {
                header('Content-type: text/xml');
                echo $string;
            }
            return false;
        }
        if ($this->_fp) {
            fwrite($this->_fp, $string);
        }
    }

    /**
     * Redirect to sitemap source files.
     * @param mixed $location
     */
    public function _redirect_sitemap($location)
    {
        if ($this->TEST_MODE) {
            return false;
        }
        if ($this->DIRECT_FILE_OUTPUT) {
            header('Content-type: text/xml');
            readfile(str_replace($this->SITEMAP_WEB_PATH, $this->SITEMAP_STORE_FOLDER, $location));
            exit();
        }
        header(($_SERVER['SERVER_PROTOCOL'] ? $_SERVER['SERVER_PROTOCOL'] : 'HTTP/1.1') . ' 301 Moved Permanently');
        header('Location: ' . $location);
    }

    /**
     * Notify google about update. $this->_file_for_google - sitemap or sitemap index file to give to google.
     */
    public function _do_notify_google()
    {
        if ($this->NOTIFY_GOOGLE) {
            fopen('http://www.google.com/webmasters/sitemaps/ping?sitemap=' . urlencode($this->_notify_url), 'r');
        }
    }

    /**
     * backwards compatible for PHP version < 5.
     * @param mixed $timestamp
     */
    public function _iso8601_date($timestamp)
    {
        return date('c', $timestamp);
    }

    /**
     * Processes sitemap file. Escapes special chars. Makes full webpath.
     * @param mixed $_path
     */
    public function _process_sitemap_file($_path = '')
    {
        $text = file_get_contents($_path);
        if ($this->ALLOW_REWRITE) {
            $RW = _class('rewrite');
            $RW->FORCE_NO_DEBUG = true;
            // Save old pattern
            $old_pattern = $RW->_links_pattern;
            // Aplly our custom pattern
            $RW->_links_pattern = '/(<loc>)([^\<]+)(<\/loc>)/ms';
            foreach ((array) common()->_my_split($text, '<url>', $this->REWRITE_SPLIT_LENGTH) as $_cur_text) {
                $rewrited .= $RW->_rewrite_replace_links($_cur_text);
            }
            $text = $rewrited;
            unset($rewrited);
            // Revert old links pattern
            $RW->_links_pattern = $old_pattern;
        } else {
            $text = str_replace('<loc>./?', '<loc>' . WEB_PATH . '?', $text);
        }
        $text = preg_replace_callback('/<loc>([^\<]+)<\/loc>/ims', function ($m) {
            return '<loc>' . htmlspecialchars($m[1]) . '</loc>';
        }, $text);
        file_put_contents($_path, $text);
    }

    /**
     * Get available user modules from the project modules folder.
     */
    public function _get_modules_from_files()
    {
        $regex = '~function(\s)+(' . implode('|', $this->HOOK_NAMES) . ')\s*\(~ims';

        $yf_prefix_len = strlen(YF_PREFIX);
        $yf_cls_ext_len = strlen(YF_CLS_EXT);
        $site_prefix_len = strlen(YF_SITE_CLS_PREFIX);

        $pattern = USER_MODULES_DIR . '*' . YF_CLS_EXT;
        $places = [
            'yf_main' => YF_PATH . $pattern,
            'yf_plugins' => YF_PATH . 'plugins/*/' . $pattern,
            'project_main' => PROJECT_PATH . $pattern,
            'project_plugins' => PROJECT_PATH . 'plugins/*/' . $pattern,
            'app_main' => APP_PATH . $pattern,
            'app_plugins' => APP_PATH . 'plugins/*/' . $pattern,
        ];
        $modules = [];
        foreach ($places as $place_name => $glob) {
            foreach (glob($glob) as $path) {
                if (substr($path, -$yf_cls_ext_len) !== YF_CLS_EXT) {
                    continue;
                }
                $name = substr(basename($path), 0, -$yf_cls_ext_len);
                if (substr($name, 0, $yf_prefix_len) === YF_PREFIX) {
                    $name = substr($name, $yf_prefix_len);
                }
                if (substr($name, 0, $site_prefix_len) === YF_SITE_CLS_PREFIX) {
                    $name = substr($name, $site_prefix_len);
                }
                if ( ! strlen($name) || ! preg_match($regex, file_get_contents($path), $m)) {
                    continue;
                }
                $modules[$name] = $name;
            }
        }
        ksort($modules);
        return $modules;
    }


    public function _tpl_sitemap_header()
    {
        return '<?xml version="1.0" encoding="UTF-8"?>'
            . PHP_EOL . '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"'
            . PHP_EOL . ' xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">'
            . PHP_EOL;
    }


    public function _tpl_sitemap_footer()
    {
        return PHP_EOL . '</urlset>' . PHP_EOL;
    }


    public function _tpl_sitemap_index_header()
    {
        return '<?xml version="1.0" encoding="UTF-8"?>'
            . PHP_EOL . '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"'
            . PHP_EOL . ' xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd">'
            . PHP_EOL;
    }


    public function _tpl_sitemap_index_footer()
    {
        return PHP_EOL . '</sitemapindex>' . PHP_EOL;
    }
}