openva/rs-video-processor

View on GitHub
bin/get_video.php

Summary

Maintainability
A
0 mins
Test Coverage
<?php

/*
 * @returns 0 if successful
 * @returns 1 if this one video fails, but there may be another
 * @returns 2 if the queue is empty
 */

# INCLUDES
# Include any files or libraries that are necessary for this specific
# page to function.
include_once __DIR__ . '/../includes/settings.inc.php';
include_once __DIR__ . '/../includes/functions.inc.php';
include_once __DIR__ . '/../includes/vendor/autoload.php';

$log = new Log();

// Define the URL for SQS.
define('SQS_URL', 'https://sqs.us-east-1.amazonaws.com/947603853016/rs-video-harvester.fifo');

// Our webroot, but for the CLI interface
define('CLI_ROOT', '/home/ubuntu/video-processor/');

/*
 * Submit this video back to the queue. We run this if this process fails in any way.
 */
function requeue($message)
{

    global $sqs_client;
    global $log;
    global $message;
    global $video;
    global $url;

    /*
     * We're getting some extra slashes added to URLs, rendering them invalid. Strip slashes before
     * requeuing the video.
     */
    $message->url = stripslashes($message->url);

    /*
     * Log this to SQS.
     */
    $sqs_client->sendMessage([
        'MessageGroupId'            => '1',
        'MessageDeduplicationId'    => mt_rand(),
        'QueueUrl'                  => SQS_URL,
        'MessageBody'               => json_encode($message)
    ]);

    $log->put('Requeued ' . $video->chamber . ' ' . $video->type . ' video for ' . $video->date
        . '.', 5);
}

/*
 * Delete this message from SQS.
 */
function delete($message)
{

    global $sqs_client;

    /*
     * Now that we have the message, delete it from SQS.
     */
    $sqs_client->DeleteMessage([
        'QueueUrl' => SQS_URL,
        'ReceiptHandle' => $message['ReceiptHandle']
    ]);
}

/*
 * Instantiate methods for AWS.
 */
use Aws\S3\S3Client;

$s3_client = new S3Client([
    'profile'   => 'default',
    'key'       => AWS_ACCESS_KEY,
    'secret'    => AWS_SECRET_KEY,
    'region'    => 'us-east-1',
    'version'   => '2006-03-01'
]);

use Aws\Sqs\SqsClient;

$sqs_client = new SqsClient([
    'profile'   => 'default',
    'key'       => AWS_ACCESS_KEY,
    'secret'    => AWS_SECRET_KEY,
    'region'    => 'us-east-1',
    'version'   => '2012-11-05'
]);

/*
 * Query SQS for any available videos.
 */
try {
    $result = $sqs_client->ReceiveMessage([
        'QueueUrl' => SQS_URL,
    ]);
    if (count($result->get('Messages')) > 0) {
        $message = current($result->get('Messages'));
    } else {
        $log->put('No pending videos found in SQS.', 1);
        exit(2);
    }
} catch (AwsException $e) {
    $log->put('No pending videos found in SQS.', 1);
    exit(1);
}

/*
 * Pull the video information out of the message body.
 */
$video = json_decode($message['Body']);

if (!isset($video)) {
    $log->put('No pending videos found in SQS.', 1);
    exit(1);
}

$log->put('Found video: ' . print_r($video, true), 5);

/*
 * Decline to process old videos, which the RSS feed coughs up sometimes.
 */
if ((bool) strtotime($video->date) && (substr($video->date, 0, 4) != SESSION_YEAR)) {
    $log->put('Not processing video from ' . $video->date . ', because it’s too old.', 5);
    delete($message);
    exit(1);
}

/*
 * Decline to process videos with invalid URLs, as can happen.
 */
if (filter_var($video->url, FILTER_VALIDATE_URL) === false) {
    $log->put('Not processing video from ' . $video->url . ', because that is not a valid URL.', 5);
    delete($message);
    exit(1);
}

/*
 * Delete this message from SQS.
 */
delete($message);

/*
 * Take as long as necessary to get the video and then store it.
 */
set_time_limit(0);

/*
 * Retrieve the file and store it locally. It may be a video or it may be just be a playlist in the
 * M3U format.
 */
if (substr($video->url, -4) == '.mp4') {
    $video->format = 'mp4';
    $video->filename = $video->chamber . '-' . $video->type . '-' . $video->date . '.mp4';
} elseif (substr($video->url, -5) == '.m3u8') {
    $video->format = 'm3u';
    $video->filename = $video->chamber . '-' . $video->type . '-' . $video->date . '.mp4';
}

$fp = fopen('../video/' . $video->filename, 'w+');
$ch = curl_init($video->url);
curl_setopt($ch, CURLOPT_FILE, $fp);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
$result = curl_exec($ch);
$curl_error = curl_error($ch);
curl_close($ch);
fclose($fp);

/*
 * If the file transfer failed.
 */
if ($result == false || !file_exists('../video/' . $video->filename)) {
    $log->put('Abandoning ' . $video->filename . ' because it could not be retrieved from ' .
        $video->url . ' . cURL error: ' . $curl_error, 7);
    unset($video->filename);
    requeue($video);
    exit(1);
}

/*
 * If the file is less than 1 MB, we've gotten an HTML error page instead of video.
 */
if ($video->format == 'mp4' && filesize('../video/' . $video->filename) < 1048576) {
    $log->put('The ' . $video->chamber . ' ' . $video->type . ' video for ' . $video->date
        . ', at ' . $video->url . ' is returning HTML instead of video. Requeuing for later '
        . 'retrieval and analysis.', 7);
    unset($video->filename);
    requeue($video);
    exit(1);
}

/*
 * If it's a playlist, combine all of its components into an MP4.
 */
if ($video->format == 'm3u') {
    $log->put($video->filename . ' is a playlist, not a video. Converting to MP4.', 3);

    $mp4_filename = str_replace('.m3u8', '', $video->filename);

    /*
     * Iterate through every fragment and save it.
     */
    for ($i = 0; $i < 9999; $i++) {
        $segment_filename = 'media_' . str_pad($i, 4, '0') . '.ts';
        $url = $mp4_filename . '/' . $segment_filename;
        $fp = fopen('../video/' . $segment_filename, 'w+');
        $ch = curl_init($url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_FILE, $fp);
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
        $response = curl_exec($ch);
        curl_close($ch);
        fclose($fp);
        if ($response === false) {
            $num_files = $i - 2;
            break;
        }
    }

    /*
     * Put together a list of all of the fragments to feed to ffmpeg.
     */
    $manifest = [];
    for ($i = 0; $i <= $num_files; $i++) {
        $manifest[] = "file 'media_" . str_pad($i, 4, '0') . ".ts'";
    }
    $manifest = implode("\n", $manifest);
    file_put_contents('../video/manifest.txt', $manifest);

    /*
     * Combine all of the fragments into a single video.
     */
    $cmd = 'ffmpeg -f concat -i ../video/manifest.txt -codec copy -bsf:a aac_adtstoasc ' . $mp4_filename;
    exec($cmd, $output, $return_var);

    unlink('../video/manifest.txt');

    if ($return_var != 0) {
        $log->put('Error: Failed in M3U -> MP4 conversion of ' . $video->filename . ', with the '
            . 'following error: ' . implode(' ', $output), 4);
        exit(1);
    }

    /*
     * Now make the MP4 the filename, rather than the playlist.
     */
    $video->filename = $mp4_filename;
}

/*
 * Connect to the database.
 */
$database = new Database();
$db = $database->connect_mysqli();

/*
 * Get committee info.
 */
if ($video->type == 'committee') {
    $committee = new Committee();
    $committee->chamber = $video->chamber;
    $committee->name = $video->committee;
    $committee->id = $committee->get_id();
    $committee->info();
    if (!isset($committee->shortname) || !isset($committee->id)) {
        $log->put('Could not identify the committee shortname or ID for the committee named '
            . '"' . $video->committee . '" — abandoning ' . $video->filename . '.', 6);
        die();
    }
    $video->committee_id = $committee->id;
    $video->committee_shortname = $committee->shortname;
}

/*
 * Copy the file to S3.
 */
if ($video->type == 'floor') {
    $s3_key = $video->chamber . '/' . 'floor/' . $video->date . '.mp4';
} elseif ($video->type == 'committee') {
    $s3_key = $video->chamber . '/' . 'committee/' . urlencode(strtolower($video->committee_shortname)) . '/' . $video->date . '.mp4';
}
$s3_url = 'https://s3.amazonaws.com/video.richmondsunlight.com/' . $s3_key;

try {
    $result = $s3_client->putObject([
        'Bucket'     => 'video.richmondsunlight.com',
        'Key'        => $s3_key,
        'SourceFile' => '../video/' . $video->filename
    ]);

    $s3_client->waitUntil('ObjectExists', [
        'Bucket' => 'video.richmondsunlight.com',
        'Key'    => $s3_key
    ]);
} catch (S3Exception $e) {
    $log->put('Could not upload video ' . $video->filename . ' to S3. Error reported: '
        . $e->getMessage(), 6);
    die();
}
$log->put('Saved ' . $video->filename . ' to S3.', 3);

/*
 * Save metadata about this to a JSON file, to be used elsewhere in the processing pipeline.
 * Note that all values must be strings, or else jq will not convert them to environment
 * variables correctly.
 */
$metadata = [];
$metadata['filename'] = $video->filename;
$metadata['date'] = (string)$video->date;
$metadata['date_hyphens'] = substr($video->date, 0, 4) . '-' . substr($video->date, 4, 2) . '-'
    . substr($video->date, 6, 2);
$metadata['s3_url'] = $s3_url;
$metadata['chamber'] = $video->chamber;
$metadata['type'] = $video->type;
if ($video->type == 'committee') {
    $metadata['committee'] = $video->committee;
}
file_put_contents('../video/metadata.json', json_encode($metadata));

$video_handler = new Video();

/*
 * Get metadata about the video.
 */
$video_handler->path = 'video/' . $video->filename;
$video_handler->video = (array) $video;
if ($video_handler->extract_file_data() == false) {
    $log->put('Error: Failed to extract file data about ' . $video_handler->path, 5);
}

/*
 * Assign any missing data.
 */
$video->path = $metadata['s3_url'];
$video->date = $metadata['date_hyphens'];

/*
 * Save this video to the database.
 */
/*foreach ((array) $video as $key => $value) {
    $video_handler->video[$key] = $value;
}
if ($video_handler->submit() == false) {
    $log->put('The ' . ucfirst($video->chamber) . ' ' . $video->type . ' video for '
        . date('M d, Y', strtotime($video->date)) . ' could not be saved to the database.', 5);
}*/

$log->put('Stored new ' . ucfirst($video->chamber) . ' ' . $video->type . ' video, for '
    . date('M d, Y', strtotime($video->date)) . ': ' . $video->path, 4);

// Return the filename to be captured by the Bash handler
echo $video->filename;