
View on GitHub


0 mins
Test Coverage
import { promises as fs } from 'fs';
import axios from 'axios';
import url from 'url';
import path from 'path';
import cheerio from 'cheerio';
import debug from 'debug';
import isImage from 'is-image';
import Listr from 'listr';
import { words, keys, has } from 'lodash';

const logExtract = debug('page-loader: extract ');
const logRequest = debug('page-loader: request ');
const logWrite = debug('page-loader: download ');

const handleError = (error) => {
  const errorTypes = [
      type: 'File system',
      check: err => has(err, 'path'),
      data: err => err.path,
      type: 'Network',
      check: () => true,
      data: err => err.config.url,

  const { type, data } = errorTypes.find(({ check }) => check(error));
  return `${type} error: trouble occured with ${data(error)} --> ${error.message}`;

const exists = link => link !== undefined;

const isLocal = (link) => {
  const { host } = url.parse(link);
  return !host;

const contentTypes = {
  main: (address) => {
    const { host, pathname } = url.parse(address);
    return [`${host}${pathname}`, '.html'];
  directory: (address) => {
    const [pathString] = contentTypes.main(address);
    return [pathString, '_files'];
  resource: (address) => {
    const { dir, name, ext } = path.parse(address);
    return [path.join(dir, name), ext];

const getContentName = (address, type) => {
  const [pathString, postfix] = contentTypes[type](address);
  return words(pathString, /[^./]+/g).join('-').concat(postfix);

const tagsProperties = {
  script: { attribute: 'src', responseType: () => 'text' },
  img: { attribute: 'src', responseType: () => 'arraybuffer' },
  link: { attribute: 'href', responseType: filepath => (isImage(filepath) ? 'arraybuffer' : 'text') },

export default async (address, dirpath) => {
  const resourceDirectoryName = getContentName(address, 'directory');
  const mainFileName = getContentName(address, 'main');
  const mainFilePath = path.join(dirpath, mainFileName);

  try {
    await fs.access(dirpath);

    const response = await axios.get(address);

    const $ = cheerio.load(, { decodeEntities: false });

    const links = [];

    const tags = keys(tagsProperties);
    tags.forEach((tag) => {
      const { attribute } = tagsProperties[tag];
      $(tag).each((i, element) => {
        const link = $(element).attr(attribute);
        if (exists(link) && isLocal(link)) {
          links.push({ link, tag });
          const resourceName = getContentName(link, 'resource');
          const resourcePath = path.join(resourceDirectoryName, resourceName);
          $(element).attr(attribute, resourcePath);

    const modifiedMainFile = $.html();

    await fs.mkdir(path.join(dirpath, resourceDirectoryName));

    const resourceTasks = new Listr([], { exitOnError: false, concurrent: true });

    links.forEach(({ link, tag }) => {
      const resourceName = getContentName(link, 'resource');
      const resourcePath = path.join(dirpath, resourceDirectoryName, resourceName);
      const { responseType } = tagsProperties[tag];

          title: url.resolve(address, link),
          task: () => axios({
            method: 'get',
            responseType: responseType(link),
            url: url.resolve(address, link),
          }).then((resourceResponse) => {
            logWrite(`resource ${link} to ${resourcePath}`);
            return fs.writeFile(resourcePath,;


    logWrite(`resource ${address} to ${mainFilePath}`);
    return fs.writeFile(mainFilePath, modifiedMainFile);
  } catch (error) {
    const message = handleError(error);
    throw new Error(message);