Godzil/Crunchy

View on GitHub
src/episode.ts

Summary

Maintainability
B
4 hrs
Test Coverage
'use strict';
import cheerio = require('cheerio');
import fs = require('fs');
import mkdirp = require('mkdirp');
import my_request = require('./my_request');
import path = require('path');
import subtitle from './subtitle/index';
import vlos from './vlos';
import video from './video/index';
import xml2js = require('xml2js');
import log  = require('./log');

/**
 * Streams the episode to disk.
 */
export default function(config: IConfig, address: string, done: (err: Error, ign: boolean) => void)
{
  scrapePage(config, address, (err, page) =>
  {
    if (err)
    {
      return done(err, false);
    }

    if (page.media != null)
    {
      /* No player to scrape */
      download(config, page, null, done);
    }
    else
    {
      /* The old way */
      scrapePlayer(config, address, page.id, (errS, player) =>
      {
        if (errS)
        {
          return done(errS, false);
        }

        download(config, page, player, done);
      });
    }
  });
}

/**
 * Completes a download and writes the message with an elapsed time.
 */
function complete(epName: string, message: string, begin: number, done: (err: Error, ign: boolean) => void)
{
  const timeInMs = Date.now() - begin;
  const seconds = prefix(Math.floor(timeInMs / 1000) % 60, 2);
  const minutes = prefix(Math.floor(timeInMs / 1000 / 60) % 60, 2);
  const hours = prefix(Math.floor(timeInMs / 1000 / 60 / 60), 2);

  log.dispEpisode(epName, message + ' (' + hours + ':' + minutes + ':' + seconds + ')', true);

  done(null, false);
}

/**
 * Check if a file exist..
 */
function fileExist(path: string)
{
  try
  {
    fs.statSync(path);
    return true;
  }
  catch (e)
  {
    return false;
  }
}

function sanitiseFileName(str: string)
{
  const sanitized = str.replace(/[\/':\?\*"<>\\\.\|]/g, '_');

  return sanitized.replace(/{DIR_SEPARATOR}/g, '/');
}

/**
 * Downloads the subtitle and video.
 */
function download(config: IConfig, page: IEpisodePage, player: IEpisodePlayer, done: (err: Error | string, ign: boolean) => void)
{
  const serieFolder = sanitiseFileName(config.series || page.series);

  let fileName = sanitiseFileName(generateName(config, page));
  let filePath = path.join(config.output || process.cwd(), serieFolder, fileName);

  if (fileExist(filePath + '.mkv'))
  {
    let count = 0;

    if (config.rebuildcrp)
    {
      log.warn('Adding \'' + fileName + '\' to the DB...');
      return done(null, false);
    }

    log.warn('File \'' + fileName + '\' already exist...');

    do
    {
      count = count + 1;
      fileName = sanitiseFileName(generateName(config, page, '-' + count));
      filePath = path.join(config.output || process.cwd(), serieFolder, fileName);
    } while (fileExist(filePath + '.mkv'));

    log.warn('Renaming to \'' + fileName + '\'...');

    page.filename = fileName;
  }

  if (config.rebuildcrp)
  {
    log.warn('Ignoring \'' + fileName + '\' as it does not exist...');
    return done(null, true);
  }

  const ret = mkdirp(path.dirname(filePath));
  if (ret)
  {
    log.dispEpisode(fileName, 'Fetching...', false);
    downloadSubtitle(config, page, player, filePath, (errDS) =>
    {
      if (errDS)
      {
        log.dispEpisode(fileName, 'Error...', true);
        return done(errDS, false);
      }

      const now = Date.now();
      if ( ((page.media === null) && (player.video.file !== undefined))
        || ((page.media !== null) /* Do they still create page in advance for unreleased episodes? */) )
      {
        log.dispEpisode(fileName, 'Fetching video...', false);
        downloadVideo(config, page, player, filePath, (errDV) =>
        {
          if (errDV)
          {
            log.dispEpisode(fileName, 'Error...', true);
            return done(errDV, false);
          }

          if (config.merge)
          {
            return complete(fileName, 'Finished!', now, done);
          }

          let isSubtitled = true;

          if (page.media === null)
          {
            isSubtitled = Boolean(player.subtitle);
          }
          else
          {
            if (page.media.subtitles.length === 0)
            {
              isSubtitled = false;
            }
          }

          let videoExt = '.mp4';
          if ( (page.media === null) && (player.video.mode === 'RTMP'))
          {
            videoExt = path.extname(player.video.file);
          }

          log.dispEpisode(fileName, 'Merging...', false);
          video.merge(config, isSubtitled, videoExt, filePath, config.verbose, (errVM) =>
          {
            if (errVM)
            {
              log.dispEpisode(fileName, 'Error...', true);
              return done(errVM, false);
            }

            complete(fileName, 'Finished!', now, done);
          });
        });
      }
      else
      {
        log.dispEpisode(fileName, 'Ignoring: not released yet', true);
        done(null, true);
      }
    });
  }
  else
  {
    log.dispEpisode(fileName, 'Error creating folder \'' + filePath + '\'...', true);
    return done('Cannot create folder', false);
  }
}

/**
 * Saves the subtitles to disk.
 */
function downloadSubtitle(config: IConfig, page: IEpisodePage, player: IEpisodePlayer,
                          filePath: string, done: (err?: Error | string) => void)
{
  if (page.media !== null)
  {
    const subs = page.media.subtitles;
    if (subs.length === 0)
    {
      /* No downloadable subtitles */
      console.warn('Can\'t find subtitle ?!');
      return done();
    }

    let i;
    let j;

    /* Find a proper subtitles */
    for (j = 0; j < config.sublang.length; j++)
    {
      const reqSubLang = config.sublang[j];
      for (i = 0; i < subs.length; i++)
      {
        const curSub = subs[i];
        if (curSub.format === 'ass' && curSub.language === reqSubLang)
        {
          my_request.get(config, curSub.url, (err, result) =>
          {
            if (err)
            {
              log.error('An error occured while fetching subtitles...');
              return done(err);
            }

            fs.writeFile(filePath + '.ass', '\ufeff' + result, done);
          });

          /* Break from the first loop */
          j = config.sublang.length;
          break;
        }
      }
    }
    if (i >= subs.length)
    {
      done('Cannot find subtitles with requested language(s)');
    }
  }
  else
  {
    const enc = player.subtitle;

    if (!enc)
    {
      return done();
    }

    subtitle.decode(enc.id, enc.iv, enc.data, (errSD, data) =>
    {
      if (errSD)
      {
        log.error('An error occured while getting subtitles...');
        return done(errSD);
      }

      if (config.debug)
      {
        log.dumpToDebug('SubtitlesXML', data);
      }

      const formats = subtitle.formats;
      const format = formats[config.format] ? config.format : 'ass';

      formats[format](config, data, (errF: Error, decodedSubtitle: string) =>
      {
        if (errF)
        {
          return done(errF);
        }

        fs.writeFile(filePath + '.' + format, '\ufeff' + decodedSubtitle, done);
      });
    });
  }
}

/**
 * Streams the video to disk.
 */
function downloadVideo(config: IConfig,  page: IEpisodePage, player: IEpisodePlayer,
                       filePath: string, done: (err: any) => void)
{
  if (player == null)
  {
    /* new way */

    const streams = page.media.streams;
    let i;
    /* Find a proper subtitles */
    for (i = 0; i < streams.length; i++)
    {
      if (streams[i].format === 'vo_adaptive_hls' && streams[i].audio_lang === 'jaJP' &&
          streams[i].hardsub_lang === null)
      {
        video.stream('', streams[i].url, '', filePath,
          'mp4', 'HLS', config.verbose, done);
        break;
      }
    }
    if (i >= streams.length)
    {
      done('Cannot find a valid stream');
    }
  }
  else
  {
    /* Old way */
    video.stream(player.video.host, player.video.file, page.swf, filePath,
                 path.extname(player.video.file), player.video.mode, config.verbose, done);
  }
}

/**
 * Names the file based on the config, page, series and tag.
 */
function generateName(config: IConfig, page: IEpisodePage, extra = '')
{
  const episodeNum = parseInt(page.episode, 10);
  const volumeNum = parseInt(page.volume, 10);
  const episode = (episodeNum < 10 ? '0' : '') + page.episode;
  const volume = (volumeNum < 10 ? '0' : '') + page.volume;
  const tag = config.tag || 'CrunchyRoll';
  const series = config.series || page.series;

  return config.nametmpl
      .replace(/{EPISODE_ID}/g, page.id.toString())
      .replace(/{EPISODE_NUMBER}/g, episode)
      .replace(/{SEASON_NUMBER}/g, volume)
      .replace(/{VOLUME_NUMBER}/g, volume)
      .replace(/{SEASON_TITLE}/g, page.season)
      .replace(/{VOLUME_TITLE}/g, page.season)
      .replace(/{SERIES_TITLE}/g, series)
      .replace(/{EPISODE_TITLE}/g, page.title)
      .replace(/{TAG}/g, tag) + extra;
}

/**
 * Prefixes a value.
 */
function prefix(value: number|string, length: number)
{
  let valueString = (typeof value !== 'string') ? String(value) : value;

  while (valueString.length < length)
  {
    valueString = '0' + valueString;
  }

  return valueString;
}

/**
 * Requests the page data and scrapes the id, episode, series and swf.
 */
function scrapePage(config: IConfig, address: string, done: (err: Error, page?: IEpisodePage) => void)
{
  const epId = parseInt((address.match(/[0-9]+$/) || ['0'])[0], 10);

  if (!epId)
  {
    return done(new Error('Invalid address.'));
  }

  my_request.get(config, address, (err, result) =>
  {
    if (err)
    {
      return done(err);
    }

    const $ = cheerio.load(result);
    /* First check if we have the new player */
    const vlosScript = $('#vilos-iframe-container');

    if (vlosScript)
    {
      const pageMetadata = JSON.parse($('script[type="application/ld+json"]')[0].children[0].data);
      const divScript = $('div[id="showmedia_video_box_wide"]');
      const scripts = divScript.find('script').toArray();
      const script = scripts[2].children[0].data;
      let seasonNumber = '1';
      let seasonTitle = '';

      if (pageMetadata.partOfSeason)
      {
        seasonNumber = pageMetadata.partOfSeason.seasonNumber;
        if (seasonNumber === '0') { seasonNumber = '1'; }

        seasonTitle = pageMetadata.partOfSeason.name;
      }
      done(null, vlos.getMedia(script, seasonTitle, seasonNumber));
    }
    else
    {
      /* Use the old way */
      const swf = /^([^?]+)/.exec($('link[rel=video_src]').attr('href'));
      const regexp = /\s*([^\n\r\t\f]+)\n?\s*[^0-9]*([0-9][\-0-9.]*)?,?\n?\s\s*[^0-9]*((PV )?[S0-9][P0-9.]*[a-fA-F]?)/;
      const seasonTitle = $('span[itemprop="title"]').text();
      const look = $('#showmedia_about_media').text();
      const episodeTitle = $('#showmedia_about_name').text().replace(/[“”]/g, '');
      const data = regexp.exec(look);

      if (config.debug) {
        log.dumpToDebug('episode page', $.html());
      }

      if (!swf || !data) {
        log.warn('Somethig unexpected in the page at ' + address + ' (data are: ' + look + ')');
        log.warn('Setting Season to ’0’ and episode to ’0’...');

        if (config.debug) {
          log.dumpToDebug('episode unexpected', look);
        }

        done(null, {
          episode: '0',
          id: epId,
          series: seasonTitle,
          season: seasonTitle,
          title: episodeTitle,
          swf: swf[1],
          volume: '0',
          filename: '',
          media: null,
        });
      } else {
        done(null, {
          episode: data[3],
          id: epId,
          series: data[1],
          season: seasonTitle,
          title: episodeTitle,
          swf: swf[1],
          volume: data[2] || '1',
          filename: '',
          media: null,
        });
      }
    }
  });
}

/**
 * Requests the player data and scrapes the subtitle and video data.
 */
function scrapePlayer(config: IConfig, address: string, id: number, done: (err: Error, player?: IEpisodePlayer) => void)
{
  const url = address.match(/^(https?:\/\/[^\/]+)/);

  if (!url)
  {
    return done(new Error('Invalid address.'));
  }

  const postForm = {
    current_page: address,
    video_format: config.video_format,
    video_quality: config.video_quality,
    media_id: id
  };

  my_request.post(config, url[1] + '/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=' + id, postForm,
    (err, result) =>
  {
    if (err)
    {
      return done(err);
    }

    xml2js.parseString(result, {
      explicitArray: false,
      explicitRoot: false,
    }, (errPS: Error, player: IEpisodePlayerConfig) =>
    {
      if (errPS)
      {
        return done(errPS);
      }

      try
      {
        const isSubtitled = Boolean(player['default:preload'].subtitle);
        let streamMode = 'RTMP';

        if (player['default:preload'].stream_info.host === '')
        {
          streamMode = 'HLS';
        }

        done(null, {
          subtitle: isSubtitled ? {
            data: player['default:preload'].subtitle.data,
            id: parseInt(player['default:preload'].subtitle.$.id, 10),
            iv: player['default:preload'].subtitle.iv,
          } : null,
          video: {
            file: player['default:preload'].stream_info.file,
            host: player['default:preload'].stream_info.host,
            mode: streamMode,
          },
        });
      } catch (parseError)
      {
        if (config.debug)
        {
          log.dumpToDebug('player scrape', parseError);
        }

        done(parseError);
      }
    });
  });
}