Badasper/project-lvl3-s382

View on GitHub
src/index.js

Summary

Maintainability
A
0 mins
Test Coverage
import os from 'os';
import _ from 'lodash';
import cheerio from 'cheerio';
import { promises as fs } from 'fs';
import path from 'path';
import url from 'url';
import axios from 'axios';
import debug from 'debug';
import Listr from 'listr';

const logName = 'page-loader:';
const log = {
  http: debug(`${logName} http:`),
  files: debug(`${logName} disk operation:`),
  info: debug(`${logName} info:`),
};

const generateName = (pathName, end, template = /[^\w]|[_]/gi) => {
  const { dir, name, ext } = path.parse(_.trim(pathName, '/'));
  const trimmedPathName = path.join(dir, name);
  const extention = end || ext || '';
  return `${trimmedPathName.replace(template, '-')}${extention}`;
};

const getDataFromUrl = (pageUrl = '', responseType = 'arraybuffer') => axios({
  method: 'get',
  url: pageUrl,
  responseType,
})
  .then(response => response.data)
  .then((data) => {
    log.http('GET %s', pageUrl);
    return data;
  });

const isCdnLink = link => url.parse(link).host;

const getLinksAndModifyHtml = (data, assetsDirName) => {
  const tags = {
    link: 'href',
    script: 'src',
    img: 'src',
  };
  const $ = cheerio.load(data);
  const links = Object.keys(tags).reduce((acc, tag) => {
    const findedLinks = $(tag).map((idx, item) => {
      const link = $(item).attr(tags[tag]) || '';
      const generatedLink = generateName(link);
      const newLink = path.join(assetsDirName, generatedLink);
      $(item).attr(tags[tag], newLink);
      log.info('replace link %s by %s', link, newLink);
      return link;
    }).get();
    return [...acc, ...findedLinks].filter(el => el);
  }, []);
  return { links, html: $.html() };
};

const taskRunner = (taskList) => {
  const tasks = new Listr(taskList, { concurrent: true });
  return tasks.run();
};

const saveHtmlAndPushLinks = (htmlFilePath, html, links) => {
  const promise = fs.writeFile(htmlFilePath, html, 'utf-8');
  return taskRunner([{ title: `save html: ${htmlFilePath}`, task: () => promise }])
    .then(() => log.files('save html %s', htmlFilePath))
    .then(() => links);
};

const downloadAssets = (links, host, assetsDirPath) => {
  const promise = fs.mkdir(assetsDirPath);
  return taskRunner([{ title: `mkdir: ${assetsDirPath}`, task: () => promise }])
    .then(() => {
      const taskList = links.map((link) => {
        const filename = generateName(link);
        const downloadLink = isCdnLink(link) ? link : url.resolve(host, link);
        const task = getDataFromUrl(downloadLink)
          .then(data => fs.writeFile(path.resolve(assetsDirPath, filename), data)
            .then(() => log.files('save file %s', path.resolve(assetsDirPath, filename))));
        return { title: `download file: ${downloadLink}`, task: () => task };
      });
      return taskRunner(taskList);
    });
};

const handleInputArgs = (pageUrl = '', dirName = '') => {
  const { hostname, pathname, protocol } = url.parse(pageUrl);
  const name = `${hostname}${pathname}`;
  const rootDir = dirName || os.tmpdir();
  const indexFileName = generateName(name, '.html');
  log.info('generate name for html file=%s', indexFileName);
  const assetsDir = generateName(name, '_files');
  log.info('generate name for assets dir=%s', assetsDir);
  const assetsDirPath = path.resolve(rootDir, assetsDir);
  const host = `${protocol}//${hostname}`;
  const indexFilePath = path.resolve(rootDir, indexFileName);

  return {
    host,
    rootDir,
    assetsDir,
    assetsDirPath,
    indexFilePath,
  };
};

const handleAndThrowError = (err) => {
  const errors = {
    EEXIST: 'Web page is already downloaded, please choose another dir',
    ENOENT: 'Cannot find output dir, please check output dir --output dirToDownload',
    EACCES: 'Permission denied to download dir, please choose another dir',
    ENETUNREACH: 'Bad url for download',
    ECONNREFUSED: 'Please check url or connection',
  };
  const result = errors[err.code];
  if (result) {
    throw new Error(`Loading fail: ${result}`);
  }
  if (err.response) {
    throw new Error(`Loading fail: Unreachable url: ${err.response.status}`);
  }
  throw err;
};

const loadPage = (pageUrl, dirName) => {
  const {
    rootDir,
    assetsDir,
    assetsDirPath,
    host,
    indexFilePath,
  } = handleInputArgs(pageUrl, dirName);

  return fs.access(rootDir)
    .then(() => getDataFromUrl(pageUrl))
    .then(data => getLinksAndModifyHtml(data, assetsDir))
    .then(({ links, html }) => saveHtmlAndPushLinks(indexFilePath, html, links))
    .then(links => downloadAssets(links, host, assetsDirPath))
    .then(() => log.info('All files downloded succsessfully!'))
    .then(() => `Download is done ${pageUrl} to ${rootDir}`)
    .catch(err => handleAndThrowError(err));
};

export default loadPage;