TryGhost/Ghost

View on GitHub
ghost/core/core/server/data/importer/import-manager.js

Summary

Maintainability
D
2 days
Test Coverage
const _ = require('lodash');
const fs = require('fs-extra');
const path = require('path');
const os = require('os');
const glob = require('glob');
const uuid = require('uuid');
const config = require('../../../shared/config');
const {extract} = require('@tryghost/zip');
const tpl = require('@tryghost/tpl');
const debug = require('@tryghost/debug')('import-manager');
const logging = require('@tryghost/logging');
const errors = require('@tryghost/errors');
const ImageHandler = require('./handlers/image');
const ImporterContentFileHandler = require('@tryghost/importer-handler-content-files');
const RevueHandler = require('./handlers/revue');
const JSONHandler = require('./handlers/json');
const MarkdownHandler = require('./handlers/markdown');
const ContentFileImporter = require('./importers/ContentFileImporter');
const RevueImporter = require('@tryghost/importer-revue');
const DataImporter = require('./importers/data');
const urlUtils = require('../../../shared/url-utils');
const {GhostMailer} = require('../../services/mail');
const jobManager = require('../../services/jobs');
const mediaStorage = require('../../adapters/storage').getStorage('media');
const imageStorage = require('../../adapters/storage').getStorage('images');
const fileStorage = require('../../adapters/storage').getStorage('files');

const emailTemplate = require('./email-template');
const ghostMailer = new GhostMailer();

const messages = {
    couldNotCleanUpFile: {
        error: 'Import could not clean up file ',
        context: 'Your site will continue to work as expected'
    },
    noContentToImport: 'Zip did not include any content to import.',
    invalidZipStructure: 'Invalid zip file structure.',
    invalidZipFileBaseDirectory: 'Invalid zip file: base directory read failed',
    zipContainsMultipleDataFormats: 'Zip file contains multiple data formats. Please split up and import separately.',
    invalidZipFileNameEncoding: 'The uploaded zip could not be read',
    invalidZipFileNameEncodingContext: 'The filename was too long or contained invalid characters',
    invalidZipFileNameEncodingHelp: 'Remove any special characters from the file name, or alternatively try another archiving tool if using MacOS Archive Utility'
};

// Glob levels
const ROOT_ONLY = 0;

const ROOT_OR_SINGLE_DIR = 1;
const ALL_DIRS = 2;
let defaults = {
    extensions: ['.zip'],
    contentTypes: ['application/zip', 'application/x-zip-compressed'],
    directories: []
};

class ImportManager {
    constructor() {
        const mediaHandler = new ImporterContentFileHandler({
            type: 'media',
            // @NOTE: making the second parameter strict folder "content/media" brakes the glob pattern
            //        in the importer, so we need to keep it as general "content" unless
            //        it becomes a strict requirement
            directories: ['media', 'content'],
            ignoreRootFolderFiles: true,
            extensions: config.get('uploads').media.extensions,
            contentTypes: config.get('uploads').media.contentTypes,
            contentPath: config.getContentPath('media'),
            urlUtils: urlUtils,
            storage: mediaStorage
        });

        const filesHandler = new ImporterContentFileHandler({
            type: 'files',
            // @NOTE: making the second parameter strict folder "content/files" brakes the glob pattern
            //        in the importer, so we need to keep it as general "content" unless
            //        it becomes a strict requirement
            directories: ['files', 'content'],
            ignoreRootFolderFiles: true,
            extensions: config.get('uploads').files.extensions,
            contentTypes: config.get('uploads').files.contentTypes,
            contentPath: config.getContentPath('files'),
            urlUtils: urlUtils,
            storage: fileStorage
        });

        const imageImporter = new ContentFileImporter({
            type: 'images',
            store: imageStorage
        });
        const mediaImporter = new ContentFileImporter({
            type: 'media',
            store: mediaStorage
        });

        const contentFilesImporter = new ContentFileImporter({
            type: 'files',
            store: fileStorage
        });

        /**
         * @type {Importer[]} importers
         */
        this.importers = [imageImporter, mediaImporter, contentFilesImporter, RevueImporter, DataImporter];

        /**
         * @type {Handler[]}
         */
        this.handlers = [ImageHandler, mediaHandler, filesHandler, RevueHandler, JSONHandler, MarkdownHandler];

        // Keep track of file to cleanup at the end
        /**
         * @type {?string}
         */
        this.fileToDelete = null;
    }

    /**
     * Get an array of all the file extensions for which we have handlers
     * @returns {string[]}
     */
    getExtensions() {
        return _.union(_.flatMap(this.handlers, 'extensions'), defaults.extensions);
    }

    /**
     * Get an array of all the mime types for which we have handlers
     * @returns {string[]}
     */
    getContentTypes() {
        return _.union(_.flatMap(this.handlers, 'contentTypes'), defaults.contentTypes);
    }

    /**
     * Get an array of directories for which we have handlers
     * @returns {string[]}
     */
    getDirectories() {
        return _.union(_.flatMap(this.handlers, 'directories'), defaults.directories);
    }

    /**
     * Convert items into a glob string
     * @param {String[]} items
     * @returns {String}
     */
    getGlobPattern(items) {
        return '+(' + _.reduce(items, function (memo, ext) {
            return memo !== '' ? memo + '|' + ext : ext;
        }, '') + ')';
    }

    /**
     * @param {String[]} extensions
     * @param {Number} [level]
     * @returns {String}
     */
    getExtensionGlob(extensions, level) {
        const prefix = level === ALL_DIRS ? '**/*' :
            (level === ROOT_OR_SINGLE_DIR ? '{*/*,*}' : '*');

        return prefix + this.getGlobPattern(extensions);
    }

    /**
     *
     * @param {String[]} directories
     * @param {Number} [level]
     * @returns {String}
     */
    getDirectoryGlob(directories, level) {
        const prefix = level === ALL_DIRS ? '**/' :
            (level === ROOT_OR_SINGLE_DIR ? '{*/,}' : '');

        return prefix + this.getGlobPattern(directories);
    }

    /**
     * Return true if the given file is a Zip
     * @returns Boolean
     */
    isZip(ext) {
        return _.includes(defaults.extensions, ext);
    }

    /**
     * Checks the content of a zip folder to see if it is valid.
     * Importable content includes any files or directories which the handlers can process
     * Importable content must be found either in the root, or inside one base directory
     *
     * @param {String} directory
     * @returns {boolean}
     */
    isValidZip(directory) {
        // Globs match content in the root or inside a single directory
        const extMatchesBase = glob.sync(this.getExtensionGlob(this.getExtensions(), ROOT_OR_SINGLE_DIR), {cwd: directory, nocase: true});

        const extMatchesAll = glob.sync(
            this.getExtensionGlob(this.getExtensions(), ALL_DIRS), {cwd: directory, nocase: true}
        );

        const dirMatches = glob.sync(
            this.getDirectoryGlob(this.getDirectories(), ROOT_OR_SINGLE_DIR), {cwd: directory}
        );

        // If this folder contains importable files or a content or images directory
        if (extMatchesBase.length > 0 || (dirMatches.length > 0 && extMatchesAll.length > 0)) {
            return true;
        }

        if (extMatchesAll.length < 1) {
            throw new errors.UnsupportedMediaTypeError({message: tpl(messages.noContentToImport)});
        }

        throw new errors.UnsupportedMediaTypeError({message: tpl(messages.invalidZipStructure)});
    }

    /**
     * Use the extract module to extract the given zip file to a temp directory & return the temp directory path
     * @param {string} filePath
     * @returns {Promise<string>} full path to the extracted folder
     */
    async extractZip(filePath) {
        const tmpDir = path.join(os.tmpdir(), uuid.v4());
        this.fileToDelete = tmpDir;

        try {
            await extract(filePath, tmpDir);
        } catch (err) {
            if (err.message.startsWith('ENAMETOOLONG:')) {
                // The file was probably zipped with MacOS zip utility. Which doesn't correctly set UTF-8 encoding flag.
                // This causes ENAMETOOLONG error on Linux, because the resulting filename length is too long when decoded using the default string encoder.
                throw new errors.UnsupportedMediaTypeError({
                    message: tpl(messages.invalidZipFileNameEncoding),
                    context: tpl(messages.invalidZipFileNameEncodingContext),
                    help: tpl(messages.invalidZipFileNameEncodingHelp),
                    code: 'INVALID_ZIP_FILE_NAME_ENCODING'
                });
            } else if (
                err.message.includes('end of central directory record signature not found')
                || err.message.includes('invalid comment length')
            ) { // This comes from Yauzl when the zip is invalid
                throw new errors.UnsupportedMediaTypeError({
                    message: tpl(messages.invalidZipFileNameEncoding),
                    code: 'INVALID_ZIP_FILE'
                });
            }
            throw err;
        }
        return tmpDir;
    }

    /**
     * Use the handler extensions to get a globbing pattern, then use that to fetch all the files from the zip which
     * are relevant to the given handler, and return them as a name and path combo
     * @param {Object} handler
     * @param {String} directory
     * @returns {File[]} Files
     */
    getFilesFromZip(handler, directory) {
        const globPattern = this.getExtensionGlob(handler.extensions, ALL_DIRS);
        return _.map(glob.sync(globPattern, {cwd: directory, nocase: true}), function (file) {
            return {name: file, path: path.join(directory, file)};
        });
    }

    /**
     * Get the name of the single base directory if there is one, else return an empty string
     * @param {String} directory
     * @returns {String}
     */
    getBaseDirectory(directory) {
        // Globs match root level only
        const extMatches = glob.sync(this.getExtensionGlob(this.getExtensions(), ROOT_ONLY), {cwd: directory, nocase: true});

        const dirMatches = glob.sync(this.getDirectoryGlob(this.getDirectories(), ROOT_ONLY), {cwd: directory, nocase: true});
        let extMatchesAll;

        // There is no base directory
        if (extMatches.length > 0 || dirMatches.length > 0) {
            return;
        }
        // There is a base directory, grab it from any ext match
        extMatchesAll = glob.sync(
            this.getExtensionGlob(this.getExtensions(), ALL_DIRS), {cwd: directory, nocase: true}
        );
        if (extMatchesAll.length < 1 || extMatchesAll[0].split('/').length < 1) {
            throw new errors.ValidationError({message: tpl(messages.invalidZipFileBaseDirectory)});
        }

        return extMatchesAll[0].split('/')[0];
    }

    /**
     * Process Zip
     * Takes a reference to a zip file, extracts it, sends any relevant files from inside to the right handler, and
     * returns an object in the importData format: {data: {}, images: []}
     * The data key contains JSON representing any data that should be imported
     * The image key contains references to images that will be stored (and where they will be stored)
     * @param {File} file
     * @returns {Promise<ImportData>}
     */
    async processZip(file) {
        const zipDirectory = await this.extractZip(file.path);

        /**
         * @type {ImportData}
         */
        const importData = {};

        this.isValidZip(zipDirectory);
        const baseDir = this.getBaseDirectory(zipDirectory);

        for (const handler of this.handlers) {
            const files = this.getFilesFromZip(handler, zipDirectory);

            debug('handler', handler.type, files);

            if (files.length > 0) {
                if (Object.prototype.hasOwnProperty.call(importData, handler.type)) {
                    // This limitation is here to reduce the complexity of the importer for now
                    throw new errors.UnsupportedMediaTypeError({
                        message: tpl(messages.zipContainsMultipleDataFormats)
                    });
                }

                const data = await handler.loadFile(files, baseDir);
                importData[handler.type] = data;
            }
        }

        if (Object.keys(importData).length === 0) {
            throw new errors.UnsupportedMediaTypeError({
                message: tpl(messages.noContentToImport)
            });
        }

        return importData;
    }

    /**
     * Process File
     * Takes a reference to a single file, sends it to the relevant handler to be loaded and returns an object in the
     * importData format: {data: {}, images: []}
     * The data key contains JSON representing any data that should be imported
     * The image key contains references to images that will be stored (and where they will be stored)
     * @param {File} file
     * @returns {Promise<ImportData>}
     */
    async processFile(file, ext) {
        const fileHandlers = _.filter(this.handlers, function (handler) {
            let match = _.includes(handler.extensions, ext);

            // CASE: content file handlers should ignore files in the root directory
            if (match && handler.directories && handler.directories.length) {
                const dir = path.dirname(file.path)?.split('/')[1];
                match = _.includes(handler.directories, dir);
            }

            return match;
        });

        const importData = {};

        await Promise.all(fileHandlers.map(async (fileHandler) => {
            debug('fileHandler', fileHandler.type);
            importData[fileHandler.type] = await fileHandler.loadFile([_.pick(file, 'name', 'path')]);
        }));

        return importData;
    }

    /**
     * Import Step 1:
     * Load the given file into usable importData in the format: {data: {}, images: []}, regardless of
     * whether the file is a single importable file like a JSON file, or a zip file containing loads of files.
     * @param {File} file
     * @returns {Promise<ImportData>}
     */
    loadFile(file) {
        const self = this;
        const ext = path.extname(file.name).toLowerCase();
        return this.isZip(ext) ? self.processZip(file) : self.processFile(file, ext);
    }

    /**
     * Import Step 2:
     * Pass the prepared importData through the preProcess function of the various importers, so that the importers can
     * make any adjustments to the data based on relationships between it
     * @param {ImportData} importData
     * @returns {Promise<ImportData>}
     */
    async preProcess(importData) {
        debug('preProcess');
        for (const importer of this.importers) {
            importData = importer.preProcess(importData);
        }

        return Promise.resolve(importData);
    }

    /**
     * Import Step 3:
     * Each importer gets passed the data from importData which has the key matching its type - i.e. it only gets the
     * data that it should import. Each importer then handles actually importing that data into Ghost
     * @param {ImportData} importData
     * @param {ImportOptions} [importOptions] to allow override of certain import features such as locking a user
     * @returns {Promise<Object.<string, ImportResult>>} importResults
     */
    async doImport(importData, importOptions) {
        debug('doImport', this.importers);
        importOptions = importOptions || {};
        const importResults = {};

        for (const importer of this.importers) {
            debug('importer looking for', importer.type, 'in', Object.keys(importData));
            if (Object.prototype.hasOwnProperty.call(importData, importer.type)) {
                importResults[importer.type] = await importer.doImport(importData[importer.type], importOptions);
            }
        }

        return importResults;
    }

    /**
     * Import Step 4:
     * Report on what was imported, currently a no-op
     * @param {Object.<string, ImportResult>} importResults
     * @returns {Promise<Object.<string, ImportResult>>} importResults
     */
    async generateReport(importResults) {
        return Promise.resolve(importResults);
    }

    /**
     * Step 5:
     * Remove files after we're done (abstracted into a function for easier testing)
     * @returns {Promise<void>}
     */
    async cleanUp() {
        if (this.fileToDelete === null) {
            return;
        }

        try {
            await fs.remove(this.fileToDelete);
        } catch (err) {
            logging.error(new errors.InternalServerError({
                err: err,
                context: tpl(messages.couldNotCleanUpFile.error),
                help: tpl(messages.couldNotCleanUpFile.context)
            }));
        }

        this.fileToDelete = null;
    }

    /**
     * Import Step 6:
     * Create an email to notify the user that the import has completed
     * @param {ImportResult} result
     * @param {Object} options
     * @param {string} options.emailRecipient
     * @param {string} options.importTag
     * @returns {string}
     */
    generateCompletionEmail(result, {
        emailRecipient,
        importTag
    }) {
        const siteUrl = new URL(urlUtils.urlFor('home', null, true));
        const postsUrl = new URL('posts', urlUtils.urlFor('admin', null, true));
        if (importTag && result?.data?.tags) {
            const tag = result.data.tags.find(t => t.name === importTag);
            postsUrl.searchParams.set('tag', tag.slug);
        }

        return emailTemplate({
            result,
            siteUrl,
            postsUrl,
            emailRecipient
        });
    }

    /**
     * Import From File
     * The main method of the ImportManager, call this to kick everything off!
     * @param {File} file
     * @param {ImportOptions} importOptions to allow override of certain import features such as locking a user
     * @returns {Promise<Object.<string, ImportResult>>}
     */
    async importFromFile(file, importOptions = {}) {
        let importData;
        if (importOptions.data) {
            importData = importOptions.data;
        } else {
            // Step 1: Handle converting the file to usable data
            // Has to be completed outside of job to ensure file is processed before being deleted
            importData = await this.loadFile(file);
        }

        debug('importFromFile completed file load', importData);

        const env = config.get('env');
        if (!env?.startsWith('testing') && !importOptions.runningInJob) {
            return jobManager.addJob({
                job: () => this.importFromFile(file, Object.assign({}, importOptions, {
                    runningInJob: true,
                    data: importData
                })),
                offloaded: false
            });
        }

        let importResult;
        try {
            // Step 2: Let the importers pre-process the data
            importData = await this.preProcess(importData);

            // Step 3: Actually do the import
            // @TODO: It would be cool to have some sort of dry run flag here
            importResult = await this.doImport(importData, importOptions);

            // Step 4: Report on the import
            importResult = await this.generateReport(importResult);

            return importResult;
        } catch (err) {
            logging.error(err, 'Content import was unsuccessful');
            importResult = {data: {errors: [err]}};
        } finally {
            // Step 5: Cleanup any files
            await this.cleanUp();

            if (!env?.startsWith('testing')) {
                // Step 6: Send email
                const email = this.generateCompletionEmail(importResult, {
                    emailRecipient: importOptions.user.email,
                    importTag: importOptions.importTag
                });
                await ghostMailer.send({
                    to: importOptions.user.email,
                    subject: importResult?.data?.errors
                        ? 'Your content import was unsuccessful'
                        : 'Your content import has finished',
                    html: email
                });
            }
        }
    }
}

/**
 * @typedef {object} ImportOptions
 * @property {boolean} [runningInJob]
 * @property {boolean} [returnImportedData]
 * @property {boolean} [importPersistUser]
 * @property {Object} [user]
 * @property {string} [user.email]
 * @property {string} [importTag]
 * @property {Object} [data]
 */

/**
 * @typedef {object} Importer
 * @property {"images"|"data"} type
 * @property {PreProcessMethod} preProcess
 * @property {DoImportMethod} doImport
 */

/**
 * @callback PreProcessMethod
 * @param {ImportData} importData
 * @returns {ImportData}
 */

/**
 * @callback DoImportMethod
 * @param {object|object[]} importData
 * @param {ImportOptions} importOptions
 * @returns {Promise<ImportResult>} import result
 */

/**
 * @typedef {object} Handler
 * @property {"images"|"data"} type
 * @property {string[]} extensions
 * @property {string[]} contentTypes
 * @property {string[]} directories
 * @property {LoadFileMethod} loadFile
 */

/**
 * @callback LoadFileMethod
 * @param {File[]} files
 * @param {string} [baseDir]
 * @returns {Promise<object[]|object>} data
 */

/**
 * File object
 * @typedef {Object} File
 * @property {string} name
 * @property {string} path
 */

/**
 * @typedef {Object} ImportData
 * @property {Object} [data]
 * @property {Array} [images]
 */

/**
 * @typedef {Object} ImportResult
 */
module.exports = new ImportManager();