IET-OU/open-media-player

View on GitHub
application/models/podcast_items_feed_model.php

Summary

Maintainability
F
4 days
Test Coverage
<?php
/**
 * Part of Open Media Player.
 *
 * @license   http://gnu.org/licenses/gpl.html GPL-3.0+
 * @copyright Copyright 2011-2015 The Open University (IET) and contributors.
 * @link      http://iet-ou.github.io/open-media-player
 */

/**
 * A model to get item meta-data from a podcast RSS feed.
 *
 * @copyright Copyright 2011 The Open University.
 * @author Jamie Daniels <j.d.daniels @ open.ac.uk>, May-July 2011.
 * @author Nick Freear, March 2012.
 */
require_once 'podcast_items_abstract_model.php';


class Podcast_items_feed_model extends Podcast_items_abstract_model
{

    const COLLECTION_ID = '__COLLECTION_ID__';


    public function __construct()
    {
        parent::__construct();

        $this->http = $this->load->library('\\IET_OU\\Open_Media_Player\\Http');
    }

    /** Test OU Podcast feed configuration and shortcode, and return the feed URL.
    * @return string
    */
    protected function _feed_url($basename, $shortcode)
    {
        $url_pattern = $this->config->item('podcast_feed_url_pattern');
        $locate = "\$config[podcast_feed_url_pattern] in 'config/embed_config.php'";

        if (! $url_pattern) {
            //ERROR 503.
            $this->_error("Missing or empty $locate", 503.1);
        }
        if (false !== strpos($url_pattern, 'example.org')) {
            $this->_error("Invalid host in $locate", 503.2);
        }
        if (false === strpos($url_pattern, self::COLLECTION_ID)) {
            $this->_error("Missing {". self::COLLECTION_ID. "} in $locate", 503.3);
        }
        $purl = parse_url($url_pattern);
        if (!isset($purl['scheme']) || !isset($purl['host']) || !isset($purl['path'])) {
            $this->_error("Invalid URL in $locate", 503.4);
        }

        if (strlen($shortcode) != OUP_PODCAST_SHORTCODE_SIZE) {
            //ERROR 400..
            $this->_error('Sorry, I expect a '. OUP_PODCAST_SHORTCODE_SIZE .' character shortcode', 400.1);
        }
        if (! preg_match(OUP_PODCAST_SHORTCODE_REGEX, $shortcode)) {
            $this->_error('Sorry, unexpected characters in shortcode.', 400.2);
        }


        return str_replace(self::COLLECTION_ID, $basename, $url_pattern);

     /*
     $podcast_feed_file = $this->config->item('podcast_feed_file');
     if (! $podcast_feed_file) {
      //ERROR 503.
      $this->_error("(feed error) \$config[podcast_feed_file] appears to be empty in 'config/embed_config.php'", 503);
     }
     $pod_base = Oupodcast_serv::POD_BASE;
     $url = $pod_base."/feeds/$basename/". $podcast_feed_file;
     */
    }


    public function get_item($basename, $shortcode = null, $captions = false)
    {

        $url = $this->_feed_url($basename, $shortcode);

        // Set a cookie (intranet-restricted content), and don't spoof. [iet-it-bugs:1463][Bug: #1]
        $result = $this->http->request($url, $spoof = false /*, array('cookie' => 'pod.auth=PASSED')*/);
        if (! $result->success) {
            //ERROR.
            $_data = $result->data;
            $result->data = $_data ? substr($_data, 0, 240) .' ... ' : $_data;
            $this->CI->_debug($result);

            if ($result->info['http_code'] == 404) {
                $this->_error(
                    "Podcast data collection deleted or not found.",
                    404.1,
                    null,
                    array('url'=>$url)
                );
            }
            // Other errors:
            $this->_error("Podcast data unknown error.", $result->info['http_code'], null, $result);
        }

        $this->_xmlo = $xmlo = @simplexml_load_string($result->data);

        if (! $xmlo) {
            //ERROR: this can be caused by network errors, or invalid XML.
            $this->CI->_debug($url);
            $this->CI->_debug(substr($result->data, 0, 122) .' ... ');
            $this->_error(
                "Podcast data XML error.",
                400.3,
                null,
                array('url'=>$url)
            );
        }

        foreach ($this->_xml_namespaces() as $prefix => $ns_uri) {
            $bok = $xmlo->registerXPathNamespace($prefix, $ns_uri);
        }

        $xpath_url = '//atom:link[contains(@href, "pod/'. $basename .'/#!'. $shortcode .'")]';
        $xpath_item = $xpath_url .'/..';
        $item = $xmlo->xpath($xpath_item);

        if (isset($item[0])) {
            $item = $item[0];
        } else {
            //ERROR.
            $this->_error("Podcast data item not found.", 404.2);
            return false;
        }

        // Access control.
        $access = array(
        #Item-level: (bool) cast.
            'published_flag' => $this->_xpath_val($xpath_item .'/oup:published_flag'),
        #Podcast-level
        //'intranet_only' => $this->_xpath_val('//oup:restrict_access'),
            'intranet_only' => $this->_xpath_val('//oup:intranet_only'),
            'private' => $this->_xpath_val('//oup:private'),
            'deleted' => $this->_xpath_val('//oup:deleted'),

       /*'_published_flag' => 'yes' != $this->_xpath_val('//app:draft'),
       '_intranet_only' => count( $xmlo->xpath('//yt:state[@reasonCode="oup:intranet_only"]') ),
       '_private' => count( $xmlo->xpath('//yt:private') ),
       '_deleted' => count( $xmlo->xpath('//yt:state[@name="deleted"]') ),
       '_processing' => count( $xmlo->xpath('//yt:state[@name="processing"]') ),
       '_list_allowed' => count( $xmlo->xpath('//yt:accessControl[@action="list"][@permission="allowed"]') ),
       */

            '_embed_allowed' => null,
        );
        $access['published'] = $access['published_flag'];

        $vars = array(
          'title' => (string) $xmlo->channel->title .': '. $item->title,
          'pod_title' => (string) $xmlo->channel->title,
          'summary' => (string) $xmlo->channel->description,
          '_copyright' => (string) $xmlo->channel->copyright,

          'item_title' => (string) $item->title,
          'item_summary' => (string) $item->description,
          'timestamp' => strtotime((string) $item->pubDate),  #?
          'media_url' => (string) $item->enclosure->attributes()->url,
          'media_length' => (string) $item->enclosure->attributes()->length,
          'media_html5' => true,
          'mime_type' => (string) $item->enclosure->attributes()->type,
          'poster_url' => $this->_xpath_val($xpath_item .'/media:thumbnail', 'url'),

          'transcript_url' => $this->_xpath_val($xpath_item .'/atom:link[@type="application/pdf"]', 'href'),
          'transcript_html_url' => $this->_xpath_val($xpath_item .'/atom:link[@type="text/html"][@rel="oup:transcript"]', 'href'),
          'caption_url' => $this->_xpath_val($xpath_item .'/atom:link[@type="application/ttml+xml"]', 'href'), #?

          '_track_md5' => $shortcode,
          '_album_id' => $basename,
          'provider_mid' => "$basename/$shortcode", #?
          'url' => $this->_xpath_val($xpath_item .'/atom:link[@rel="oup:longlink"]', 'href'),
          '_short_url' => $this->_xpath_val($xpath_url, 'href'),
          // Iframe - see Oupodcast_serv::_post_process()
          'iframe_url' => null,

          'link' => $this->_xpath_val('//channel/atom:link[@rel="related"]', 'href'),
          'link_text' => $this->_xpath_val('//channel/atom:link[@rel="related"]', 'title'),
          'target_url' => $this->_xpath_val($xpath_item .'/atom:link[@rel="related"]', 'href'),
          'target_text' => $this->_xpath_val($xpath_item .'/atom:link[@rel="related"]', 'title'),

          '_itunes_url' => $this->_xpath_val('//atom:link[@rel="oup:ituneslink"]', 'href'), #oup:rel-itunes-url
          '_youtube_url' => $this->_xpath_val($xpath_item .'/atom:content[contains(@src, "youtube.com")]', 'src'), #'), #[@type="application/x-shockwave-flash"]
          'duration_orig' => $this->_xpath_val($xpath_item .'/itunes:duration'),
          'duration' => 0,
          'aspect_ratio' => $this->_xpath_val($xpath_item .'/oup:aspect_ratio'),

          '_access' => $access,

          #'_' => $item,
        );

        // Post process the duration, youtube URL etc.
        $duration = $vars['duration_orig'];
        if (preg_match('/\d{2}:\d{2}:\d{2}/', $duration)) {
            $vars['duration'] = strtotime("1970-01-01T".$vars['duration_orig']. "GMT");
        }

        $vars['media_type'] = substr($vars['mime_type'], 0, strpos($vars['mime_type'], '/'));

        if ($vars['_youtube_url']) {
            $vars['youtube_id'] = preg_replace(array('#^.+v\/#', '#\?.*$#'), '', $vars['_youtube_url']);
        }

        return (object) $vars;
    }



    /** Safely get an element or attribute value for SimpleXML.
    * @return string
    */
    protected function _xpath_val($expr, $attr = null)
    {
        $reso = $this->_xmlo->xpath($expr);
        if (isset($reso[0])) {
            if ($attr) {
                return (string) $reso[0]->attributes()->{$attr}[0];
            }
            return (string) $reso[0];
        }
        return false;
    }

    /** Return all of the XML namespaces, and prefixes used in OU podcast RSS.
    * @return array
    */
    protected function _xml_namespaces()
    {
        $NAMESPACES = array(
        # http://validator.w3.org/feed/docs/howto/declare_namespaces.html
        # http://w3.org/TR/html5/namespaces.html
        /*'fn' => 'http://www.w3.org/2005/xpath-functions',
        'xmlns' => 'http://www.w3.org/2000/xmlns/',
        'xml' => 'http://www.w3.org/XML/1998/namespace',
        'html' => 'http://www.w3.org/1999/xhtml',
        'rss2' => 'http://backend.userland.com/rss2',*/

            'atom' => 'http://www.w3.org/2005/Atom',
            'app' => 'http://www.w3.org/2007/app',

            'itunes' => 'http://www.itunes.com/dtds/podcast-1.0.dtd',
            'itunesu' => 'http://www.itunesu.com/feed',
            'media' => 'http://search.yahoo.com/mrss/',

            'gd' => 'http://schemas.google.com/g/2005',
            'yt' => 'http://gdata.youtube.com/schemas/2007',
            'openSearch' => 'http://a9.com/-/spec/opensearch/1.1/',

            's' => 'http://purl.org/steeple',
            'oup' => 'http://podcast.open.ac.uk/2012',
            'oupod_' => 'http://purl.org/net/oupod/',
        );
        return $NAMESPACES;
    }


    // function to check to see if the file exists at the destination url
    protected function url_exists($url)
    {
        $url = str_replace("http://", "", $url);
        if (strstr($url, "/")) {
            $url = explode("/", $url, 2);
            $url[1] = "/".$url[1];
        } else {
            $url = array($url, "/");
        }

        $fhand = fsockopen($url[0], 80);
        if ($fhand) {
            fputs($fhand, "GET ".$url[1]." HTTP/1.1\nHost:".$url[0]."\n\n");
            if (fread($fhand, 22) == "HTTP/1.1 404 Not Found") {
                return false;
            } else {
                return true;
            }

        } else {
            return false;
        }
    }

    /**
    * xml2array
    * lz_speedy at web dot de 30-Dec-2008 10:20
    * http://www.php.net/manual/en/function.xml-parse.php#87920
    */
    protected function xml2array($contents, $get_attributes = 1, $priority = 'tag')
    {
        if (!$contents) {
            return array();
        }

        if (!function_exists('xml_parser_create')) {
            //print "'xml_parser_create()' function not found!";
            return array();
        }

     //Get the XML parser of PHP - PHP must have this module for the parser to work
        $parser = xml_parser_create('');
        xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, "UTF-8"); # http://minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss
        xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
        xml_parser_set_option($parser, XML_OPTION_SKIP_WHITE, 1);
        xml_parse_into_struct($parser, trim($contents), $xml_values);
        xml_parser_free($parser);

        if (!$xml_values) {
            return;//Hmm...
        }
        // Initializations
        $xml_array = array();
        $parents = array();
        $opened_tags = array();
        $arr = array();

        $current = &$xml_array; //Refference

        // Go through the tags.
        $repeated_tag_index = array();//Multiple tags with same name will be turned into an array
        foreach ($xml_values as $data) {
            unset($attributes,$value);//Remove existing values, or there will be trouble

         // This command will extract these variables into the foreach scope
         // tag(string), type(string), level(int), attributes(array).
            extract($data);//We could use the array by itself, but this cooler.

            $result = array();
            $attributes_data = array();

            if (isset($value)) {
                if ($priority == 'tag') {
                    $result = $value;
                } else {
                    $result['value'] = $value; //Put the value in a assoc array if we are in the 'Attribute' mode
                }
            }

            // Set the attributes too.
            if (isset($attributes) && $get_attributes) {
                foreach ($attributes as $attr => $val) {
                    if ($priority == 'tag') {
                        $attributes_data[$attr] = $val;
                    } else {
                        $result['attr'][$attr] = $val; //Set all the attributes in a array called 'attr'
                    }
                }
            }

            // See tag status and do the needed.
            if ($type == "open") {
                // The starting of the tag '<tag>'
                $parent[$level-1] = &$current;
                if (!is_array($current) || (!in_array($tag, array_keys($current)))) {
                    // Insert New tag
                    $current[$tag] = $result;
                    if ($attributes_data) {
                        $current[$tag. '_attr'] = $attributes_data;
                    }
                    $repeated_tag_index[$tag.'_'.$level] = 1;

                    $current = &$current[$tag];

                } else {
                    // There was another element with the same tag name

                    if (isset($current[$tag][0])) {
                        //If there is a 0th element it is already an array
                        $current[$tag][$repeated_tag_index[$tag.'_'.$level]] = $result;
                        $repeated_tag_index[$tag.'_'.$level]++;
                    } else {
              //This section will make the value an array if multiple tags with the same name appear together
                        $current[$tag] = array($current[$tag],$result);//This will combine the existing item and the new item together to make an array
                        $repeated_tag_index[$tag.'_'.$level] = 2;

                        if (isset($current[$tag.'_attr'])) {
               //The attribute of the last(0th) tag must be moved as well
                            $current[$tag]['0_attr'] = $current[$tag.'_attr'];
                            unset($current[$tag.'_attr']);
                        }

                    }
                    $last_item_index = $repeated_tag_index[$tag.'_'.$level]-1;
                    $current = &$current[$tag][$last_item_index];
                }

            } elseif ($type == "complete") {
                // Tags that ends in 1 line '<tag />'
                // See if the key is already taken.
                if (!isset($current[$tag])) {
                    // New Key
                    $current[$tag] = $result;
                    $repeated_tag_index[$tag.'_'.$level] = 1;
                    if ($priority == 'tag' && $attributes_data) {
                        $current[$tag. '_attr'] = $attributes_data;
                    }

                } else {
                    // If taken, put all things inside a list(array)
                    if (isset($current[$tag][0]) && is_array($current[$tag])) {
                        // If it is already an array...

                        // ...push the new element into that array.
                        $current[$tag][$repeated_tag_index[$tag.'_'.$level]] = $result;

                        if ($priority == 'tag' && $get_attributes && $attributes_data) {
                            $current[$tag][$repeated_tag_index[$tag.'_'.$level] . '_attr'] = $attributes_data;
                        }
                        $repeated_tag_index[$tag.'_'.$level]++;

                    } else {
                        // If it is not an array...
                        $current[$tag] = array($current[$tag],$result); //...Make it an array using using the existing value and the new value
                        $repeated_tag_index[$tag.'_'.$level] = 1;
                        if ($priority == 'tag' && $get_attributes) {
                            if (isset($current[$tag.'_attr'])) {
                  //The attribute of the last(0th) tag must be moved as well

                                $current[$tag]['0_attr'] = $current[$tag.'_attr'];
                                unset($current[$tag.'_attr']);
                            }

                            if ($attributes_data) {
                                $current[$tag][$repeated_tag_index[$tag.'_'.$level] . '_attr'] = $attributes_data;
                            }
                        }
                        $repeated_tag_index[$tag.'_'.$level]++; //0 and 1 index is already taken
                    }
                }

            } elseif ($type == 'close') {
                // End of tag '</tag>'
                $current = &$parent[$level-1];
            }
        }

        return ($xml_array);
    }

    /**
    * @author Jamie Daniels.
    */
    public function get_item_v1($basename, $shortcode = null, $captions = false)
    {

        $pod_base = Oupodcast_serv::POD_BASE;
        $file=$pod_base."/feeds/$basename/".config_item('podcast_feed_file');

        // Confirm RSS file containing player data exists
        if (file($file)) {
            $rawrssdata =  $this->xml2array(file_get_contents($file));
        } else {
            echo "<p>This media item is currently unavailable. It will appear here once it has been transcoded.</p> <!--$file does not exist.--> ";
            die();
        }
        // Confirm the RSS file has item elements
        try {
            if (array_key_exists('item', $rawrssdata['rss']['channel'])) {
             //            require_once('dBug.php');
             // Loop round items looking for the one that matches the shortcode

             // count how many items there are
                if (!isset($rawrssdata['rss']['channel']['item'][0])) {
                    $rssdata[0]=$rawrssdata['rss']['channel']['item'];
                } else {
                    $rssdata = $rawrssdata['rss']['channel']['item'];
                }

                foreach ($rssdata as $rssitem) {
                    // BH 20111102 - added test to check $rssitem['atom:link_attr']['href'] exists before trying to explode it
                    if (isset($rssitem['atom:link_attr']['href'])) {
                        $explodedshortcodeurl=explode('#', $rssitem['atom:link_attr']['href']);
                    // If the shortcode is found populate variables with the data from that item.
                        if ($explodedshortcodeurl[1]==$shortcode) {
                            $rssshortcode=$explodedshortcodeurl[1];
                            $rsstitle=$rssitem['title'];
                            $rssdesc=$rssitem['description'];
                            $rssduration=$rssitem['itunes:duration'];
                            $rsspubdate=$rssitem['pubDate'];

                            $rssmediafilename=explode('/', $rssitem['guid']);
                            $rssmediafilename=end($rssmediafilename);
                            if (!$this->url_exists($pod_base."/feeds/$basename/$rssmediafilename")) {
                                echo "<p>Sorry this video is currently unavailable. <!--$rssmediafilename does not exist, it may still be transcoding --></p>";
                                die();
                            }

                            $rssitemimage=explode('/', $rssitem['media:thumbnail_attr']['url']);
                            $rssitemimage=end($rssitemimage);  // poster image
                            if (!$this->url_exists($pod_base."/feeds/$basename/$rssitemimage")) {
                             // Media Thumbnail doesn't seem to exist so fall back to use the podcast thumb, if that exists
                                $rsspodcastimage=end(explode('/', $rawrssdata['rss']['channel']['image']['url']));

                                if ($this->url_exists($pod_base."/feeds/$basename/".$rsspodcastimage)) {
                                 // we found a podcast level thumkbnail so use that oiver the default.
                                    $rssitemimage=$rsspodcastimage;
                                } else {
                                 // There is no podcast thumbnail either so we have to use the default thumb image.
                                    $rssitemimage="../default-project-thumbnail.png";
                                }

                            }
                        }
                    }
                }

            }

        } catch (Exception $e) {
            echo 'ERROR: ' . $e->getMessage();
            echo "<p>Sorry this video is currently unavailable.</p>  <!--$file exists but does not contain any items. -->";
            die();
        }


     // The data to return.  Some of it hardcoded as its not present in the RSS.
        $result = (object) array(
            'pod_title'=>$rsstitle,
            'pod_summary'=> $rssdesc,
            'podcast_id'=> '',
            'duration'=> $rssduration,
            'custom_id' => $basename,
            'shortcode' => $rssshortcode,
            'filename' => $rssmediafilename,
            'image' => $rssitemimage,    //image filename as is
            'image_filename' => '',         // image filename that it will add .jpg to
            'private' => 'N',
            'intranet_only' => 'N',
            'published' => 'Y',
            'deleted' => 'N',
            'preferred_url' => '',
            'target_url_text' => '',
            'target_url' => '',
            'itunes_u_url' => '',
            'aspect_ratio' => '',
            'keywords' => '',
            'title' => $rsstitle,
            'publication_date' => $rsspubdate,
            'copyright' => '',
            'source_media' => '',
            'published_flag' => '',
            'pim_type' => 'desktop-all'
        );

        if ($result) {
            return $result;
        }
        return false;
    }
}